Loading packages
#===============================================================================
#BTC.LineZero.Header.1.1.0
#===============================================================================
#R Markdown environment setup and reporting utility.
#===============================================================================
#RLB.Dependencies:
# knitr, magrittr, pacman, rio, rmarkdown, rmdformats, tibble, yaml
#===============================================================================
#Input for document parameters, libraries, file paths, and options.
#=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=
knitr::opts_chunk$set(message=FALSE, warning = FALSE)
path_working <- "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/"
path_library <- "/Library/Frameworks/R.framework/Resources/library"
str_libraries <- c(
"readxl", "phyloseq", "tidyverse", "pacman", "yaml"
)
path_working <- "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git"
path_library <- "/Library/Frameworks/R.framework/Resources/library"
str_libraries <- c("readxl", "phyloseq", "tidyverse", "pacman", "yaml", "ggplot2", "vegan", "microbiome", "ggpubr", "viridis", "decontam", "gridExtra", "ggpubr", "lme4", "lmerTest", "writexl", "harrietr", "Maaslin2", "ggtext", "ggpmisc", "gridExtra", "gamm4", "reshape2", "kableExtra", "knitr", "ggtree", "car")
YAML_header <-
'---
title: "Host-DNA depletion 1: data wrangling"
author: "Minsik Kim"
date: "2032.04.09"
output:
rmdformats::downcute:
downcute_theme: "chaos"
code_folding: hide
fig_width: 6
fig_height: 6
---'
seed <- "20230330"
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Loads libraries, file paths, and other document options.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
FUN.LineZero.Boot <- function() {
.libPaths(path_library)
require(pacman)
pacman::p_load(c("knitr", "rmarkdown", "rmdformats", "yaml"))
knitr::opts_knit$set(root.dir = path_working)
str_libraries |> unique() |> sort() -> str_libraries
pacman::p_load(char = str_libraries)
set.seed(seed)
}
FUN.LineZero.Boot()
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Outputs R environment report.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
FUN.LineZero.Report <- function() {
cat("Line Zero Environment:\n\n")
paste("R:", pacman::p_version(), "\n") |> cat()
cat("Libraries:\n")
for (str_libraries in str_libraries) {
paste(
" ", str_libraries, ": ", pacman::p_version(package = str_libraries),
"\n", sep = ""
) |> cat()
}
paste("\nOperating System:", pacman::p_detectOS(), "\n") |> cat()
paste(" Library Path:", path_library, "\n") |> cat()
paste(" Working Path:", path_working, "\n") |> cat()
paste("Seed:", seed, "\n\n") |> cat()
cat("YAML Header:\n")
cat(YAML_header)
}
FUN.LineZero.Report()
## Line Zero Environment:
##
## R: 4.2.2
## Libraries:
## readxl: 1.4.2
## phyloseq: 1.40.0
## tidyverse: 2.0.0
## pacman: 0.5.1
## yaml: 2.3.7
## ggplot2: 3.4.1
## vegan: 2.6.4
## microbiome: 1.18.0
## ggpubr: 0.6.0
## viridis: 0.6.2
## decontam: 1.16.0
## gridExtra: 2.3
## ggpubr: 0.6.0
## lme4: 1.1.31
## lmerTest: 3.1.3
## writexl: 1.4.2
## harrietr: 0.2.3
## Maaslin2: 1.10.0
## ggtext: 0.1.2
## ggpmisc: 0.5.2
## gridExtra: 2.3
## gamm4: 0.2.6
## reshape2: 1.4.4
## kableExtra: 1.3.4
## knitr: 1.42
## ggtree: 3.4.4
## car: 3.1.1
##
## Operating System: Darwin
## Library Path: /Library/Frameworks/R.framework/Resources/library
## Working Path: /Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git
## Seed: 20230330
##
## YAML Header:
## ---
## title: "Host-DNA depletion 1: data wrangling"
## author: "Minsik Kim"
## date: "2032.04.09"
## output:
## rmdformats::downcute:
## downcute_theme: "chaos"
## code_folding: hide
## fig_width: 6
## fig_height: 6
## ---
Script description
1. Loading data
1.1. phyloseq obejct
1.2. qPCR data (controls)
2. QC
QC1. How many samples failed sequencing
QC2. How were changes in read stats and host DNA proportion?
QC3. How were the extraction controls
QC4. Prevalence / abundance filtering - red flag
3. Analysis
A0. Calculation of alpha-diversity indices
A1. Host DNA, bacterial DNA and % host
A2. Modeling of sequencing results
A3. Taxa alpha diversity
A4. Taxa beta diversity
Intermediate results
A5. DA analysis for taxa
A6. Decontam
A7. LM of function alpha diversity (BPI)
A8. permanova of function alpha diversity
A9. DA for function
Data inputs
Meta data
qPCR - bacteria
qPCR - human
qPCR host %
Raw reads
final reads
sequencing host %
library prep failure status
Raw reads
subject_id
treatment
sample_type
subject_id
Sequencing result
samples
controls
Loading data
# Loading files -----------------------------------------------------------
#loading tidy phyloseq object
phyloseq <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/Phyloseq/PHY_20221129_MGK_host_tidy_tax.rds")
#sample data loading
sample_data <- sample_data(phyloseq$phyloseq_count)
Q1. How were sequencing results?
Figure - regular scale
Raw scale is not normally distributed
# Initail QC --------------------------------------------------------------
#Quesetions - QC
#Q0. How many samples failed in sequencing
## figures -----raw data
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
gather(feature, value, Raw_reads:sequencing_host_prop) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "sequencing_host_prop")) %>%
mutate(feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "sequencing_host_prop"), labels = c("Raw reads", "Host mapped", "Final reads", "Host ratio"))) %>%
ggplot(aes(x = value, fill = treatment)) +
geom_histogram(bins = 97) +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
facet_grid(sample_type~feature, scales = "free") +
ggtitle("log10 transfromed histrogram") +
theme_classic() +
theme(legend.position = "top")
Figure - log10 scale
log transform is adquate for read counts
Host% is not transfromed well
## figures -----log10
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
mutate(host_seq_percent = 100 * sequencing_host_prop,
.after = sequencing_host_prop,) %>%
gather(feature, value, Raw_reads:host_seq_percent) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent")) %>%
mutate(feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent"), labels = c("Raw reads", "Host mapped", "Final reads", "Host %"))) %>%
ggplot(aes(x = log10(value), fill = treatment)) +
geom_histogram(bins = 97) +
facet_grid(sample_type~feature, scales = "free") +
ggtitle("log10 transformed") +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
theme_classic() +
theme(legend.position = "top")
Figure - scaling host proportion
Raw % will be used for host%
## figures -----log10
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
mutate(host_seq_percent = sequencing_host_prop,
log_seq_percent = log10(host_seq_percent),
sqrt_seq_percent = sqrt(host_seq_percent),
.after = sequencing_host_prop,) %>%
gather(feature, value, Raw_reads:sqrt_seq_percent) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("host_seq_percent", "log_seq_percent", "sqrt_seq_percent")) %>%
mutate(feature = factor(feature, levels = c("host_seq_percent", "log_seq_percent", "sqrt_seq_percent"), labels = c("Host ratio", "log10 (host ratio)", "Sqrt(host ratio)"))) %>%
ggplot(aes(x = value, fill = treatment)) +
geom_histogram(bins = 97) +
facet_grid(sample_type~feature, scales = "free") +
ggtitle("Host % transfromed (raw, log10, and sqrt) histrogram") +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
theme_classic() +
theme(legend.position = "top")
Figure - log10 scale by treatment
ggarrange(ggplot(sample_data %>% subset(., !is.na(.$subject_id)) %>% data.frame(), aes(x = Final_reads, fill = treatment)) +
geom_histogram(bins = 97) +
facet_wrap(~sample_type) +
theme_classic(base_family = "serif") +
ggtitle("Histogram of final reads by sample type and treatment") +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment"),
ggplot(sample_data %>% subset(., !is.na(.$subject_id)) %>% data.frame(), aes(x = log10(Final_reads), fill = treatment)) +
geom_histogram(bins = 97) +
facet_wrap(~sample_type) +
theme_classic(base_family = "serif") +
ggtitle("Histogram of log10(final reads) by sample type and treatment") +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment"),
common.legend = T, ncol = 1)
Histogram (sum of OTU table)
2 samples showed 0 reads in sum(OTU)
hist((log10((phyloseq$phyloseq_count %>% otu_table %>% colSums()) + 1)),100, main = "Histogram of total reads (sum of OTU table)") # 2 samples showed 0 total reads (sum of otu_table)
Final reads of by sample type
Some samples did not pass library prep QC, but showed reasonable final reads
#how were the samples failed in library prep?
sample_data %>% data.frame %>% mutate(total_read = phyloseq$phyloseq_count %>% otu_table %>% colSums()) %>%
ggplot(aes(x = reorder(baylor_other_id, -total_read),
y = log10(total_read + 1),
col = sample_type)) +
geom_point() +
theme_classic(base_family = "serif") +
theme(axis.title.y = element_markdown(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 4)) +
ylab("log<sub>10</sub>(Sum of OTU table reads)") +
xlab("Sample ID") +
guides(col=guide_legend(title="Library failed")) +
ggtitle("Sum of OTU reads by sample type")
Final reads of library prep failed samples
Some samples did not pass library prep QC, but showed reasonable final reads
#how were the samples failed in library prep?
sample_data %>% data.frame %>% mutate(total_read = phyloseq$phyloseq_count %>% otu_table %>% colSums()) %>%
ggplot(aes(x = reorder(baylor_other_id, -total_read),
y = log10(total_read + 1),
col = lib_failed)) +
geom_point() +
theme_classic(base_family = "serif") +
theme(axis.title.y = element_markdown(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 4)) +
ylab("log<sub>10</sub>(Sum of OTU table reads)") +
xlab("Sample ID") +
guides(col=guide_legend(title="Library failed")) +
ggtitle("Sum of OTU reads by library failure status")
Raw reads, Mapped reads, host reads, final reads, and sumOTU
Some samples did not pass library prep QC, but showed reasonable final reads
#how were the samples failed in library prep?
sample_data %>% data.frame %>%
mutate(total_read = phyloseq$phyloseq_count %>%
otu_table %>% colSums()) %>%
melt(id.vars=c("baylor_other_id"),
measure.vars=c("Raw_reads", "LowQual_removed", "Reads_after_trim", "Host_mapped", "Final_reads", "Metaphlan_mapped", "total_read"),
variable.name="category",
value.name="reads") %>%
mutate(category = factor(category, levels = c("Raw_reads", "LowQual_removed", "Reads_after_trim", "Host_mapped", "Final_reads", "Metaphlan_mapped", "total_read"),
labels = c("Raw", "Low qual removed", "Trimmed reads","Host", "Final", "Metaphlan", "OTU sum"))) %>%
ggplot(aes(x = reorder(baylor_other_id, -reads),
y = log10(reads + 1),
col = category)) +
geom_point() +
theme_classic(base_family = "serif") +
theme(axis.title.y = element_markdown(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 4)) +
ylab("log<sub>10</sub>(reads + 1)") +
xlab("Sample ID") +
guides(col=guide_legend(title="Library failed")) +
ggtitle("Read counts by samples at each data processing step")
List of samples failed in sequencing
2 BAL samples (control and lyPMA group) failed in sequencing
sample_data %>% data.frame %>% filter(phyloseq$phyloseq_count %>% otu_table %>% colSums() == 0) # two BAL sampels showed 0 total reads
#sample_data(phyloseq$phyloseq_count) %>% data.frame() %>% subset(., .$lib_failed)
QC 1 Results:
1.1 Modeling final read should be conducted with log transfrom. Host % need no transformation.
1.2 13 samples failed in library prep
1.3. Two BAL sampels showed 0 total reads
1.4. Sequencing fail ≠ library prep fail
Comments from Baylor:
Q: What was the lab’s criteria for deciding which samples failed library prep.? There were 13 samples that you pointed as failed but their sequencing result actually looks pretty good (ie similar to samples that didn’t fail library prep)
A: To determine whether a library attempt “passed or failed” the lab looks at the picogreen concentrations and a library fragment size distribution trace. The trace files are an output from either the Fragment Analyzer or TapeStation (a copy of the trace files for PQ00331 is attached). If a sample has a background level pico concentration and no discernable fragment concentration on the trace (i.e. a flat trace line) it is considered failed library. If the sample is below the level of detection of our standard library QC methods, it is considered failure. It’s still possible that there is some small amounts of library in those samples that were successfully sequenced, but often those samples do not generate a meaningful amount of sequence data.
QC2 Chagnes of reads and host % by treatment
For detailed analysis, sequencing matrices were analyzed by each sample type and treatment
Reads and host % by treatment
QC table by treated (binary)
Changes in matrices were observed
#sequencing result by sample type and control (1/0)
options(dplyr.summarise.inform = FALSE)
sample_data %>% data.frame() %>%
group_by(sample_type, treated) %>%
summarise(N = n(),
`Raw reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Raw_reads/10000000),2),nsmall = 2, big.mark = ","), " [", format(round(quantile(Raw_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Raw_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Host_mapped/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Host_mapped/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Host_mapped/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads proportion<br>(median [IQR])<br>[%]` = paste(format(round(median(sequencing_host_prop * 100),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(sequencing_host_prop * 100, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(sequencing_host_prop * 100, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Final reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Final_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Final_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Final_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
) %>%
rename(`Sample type` = sample_type, Treated = treated) %>%
data.frame(check.names = F) %>% mutate_all(linebreak) %>% kbl(format = "html", escape = F) %>% kable_styling(full_width = 0, html_font = "serif")
| Sample type | Treated | N |
Raw reads (median [IQR]) [reads x 107] |
Host reads (median [IQR]) [reads x 107] |
Host reads proportion (median [IQR]) [%] |
Final reads (median [IQR]) [reads x 107] |
|---|---|---|---|---|---|---|
| Neg. | 0 | 6 | 0.20 [0.17, 0.22] | 0.02 [0.01, 0.02] | 16.85 [7.97, 20.09] | 0.08 [0.06, 0.11] |
| Neg. | 1 | 25 | 0.22 [0.17, 0.30] | 0.02 [0.02, 0.03] | 16.70 [14.76, 20.97] | 0.10 [0.08, 0.14] |
| Mock | 0 | 6 | 10.88 [10.36, 11.02] | 0.03 [0.02, 0.03] | 0.30 [0.28, 0.31] | 10.02 [9.60, 10.21] |
| Mock | 1 | 25 | 10.58 [8.16, 11.83] | 0.07 [0.06, 0.07] | 0.64 [0.63, 0.66] | 9.79 [7.29, 10.87] |
| BAL | 0 | 5 | 15.73 [6.35, 15.92] | 12.92 [5.21, 12.94] | 99.72 [99.59, 99.75] | 0.03 [0.03, 0.04] |
| BAL | 1 | 25 | 6.17 [4.57, 17.43] | 4.65 [2.78, 12.80] | 95.83 [87.19, 98.81] | 0.17 [0.10, 0.37] |
| Nasal | 0 | 10 | 13.09 [7.73, 16.93] | 10.05 [6.11, 13.04] | 94.05 [92.82, 97.87] | 0.48 [0.10, 0.87] |
| Nasal | 1 | 25 | 4.08 [0.99, 6.40] | 0.81 [0.26, 1.36] | 32.80 [15.74, 78.71] | 0.97 [0.17, 3.42] |
| Sputum | 0 | 5 | 8.59 [8.25, 9.27] | 6.87 [6.69, 7.50] | 99.19 [98.86, 99.21] | 0.06 [0.06, 0.09] |
| Sputum | 1 | 25 | 12.23 [10.34, 13.73] | 7.71 [3.76, 8.82] | 87.45 [47.33, 92.94] | 1.16 [0.47, 4.19] |
QC table by treatment methods
Changes were sample type * treatment specific
sample_data %>% data.frame() %>%
#dplyr::filter(sample_type %in% c("Sputum", "nasal_swab", "BAL")) %>%
group_by (sample_type, treatment) %>%
summarise(N = n(),
`Raw reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Raw_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Raw_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Raw_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Host_mapped/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Host_mapped/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Host_mapped/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads proportion<br>(median [IQR])<br>[%]` = paste(format(round(median(sequencing_host_prop * 100),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(sequencing_host_prop * 100, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(sequencing_host_prop * 100, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Final reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Final_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Final_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Final_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
) %>% data.frame(check.names = F) %>%
arrange(sample_type, treatment) %>%
rename(`Sample type` = sample_type, Treatment = treatment) %>%
mutate_all(linebreak) %>% kbl(format = "html", escape = F) %>% kable_styling(full_width = 0, html_font = "serif")
| Sample type | Treatment | N |
Raw reads (median [IQR]) [reads x 107] |
Host reads (median [IQR]) [reads x 107] |
Host reads proportion (median [IQR]) [%] |
Final reads (median [IQR]) [reads x 107] |
|---|---|---|---|---|---|---|
| Neg. | Untreated | 6 | 0.20 [0.17, 0.22] | 0.02 [0.01, 0.02] | 16.85 [7.97, 20.09] | 0.08 [0.06, 0.11] |
| Neg. | lyPMA | 5 | 0.16 [0.15, 0.19] | 0.01 [0.01, 0.01] | 16.55 [14.92, 16.70] | 0.07 [0.07, 0.08] |
| Neg. | Benzonase | 5 | 0.19 [0.17, 0.22] | 0.02 [0.02, 0.05] | 17.49 [15.45, 27.40] | 0.09 [0.08, 0.10] |
| Neg. | Host zero | 5 | 0.29 [0.24, 0.40] | 0.02 [0.02, 0.04] | 13.14 [10.22, 28.16] | 0.13 [0.10, 0.18] |
| Neg. | Molysis | 5 | 0.19 [0.18, 0.22] | 0.03 [0.02, 0.03] | 19.94 [16.61, 19.95] | 0.10 [0.09, 0.13] |
| Neg. | QIAamp | 5 | 0.30 [0.25, 0.31] | 0.02 [0.02, 0.03] | 17.61 [13.26, 20.10] | 0.12 [0.10, 0.14] |
| Mock | Untreated | 6 | 10.88 [10.36, 11.02] | 0.03 [0.02, 0.03] | 0.30 [0.28, 0.31] | 10.02 [9.60, 10.21] |
| Mock | lyPMA | 5 | 2.07 [0.51, 8.23] | 0.09 [0.03, 0.10] | 4.91 [1.42, 7.75] | 1.61 [0.31, 7.29] |
| Mock | Benzonase | 5 | 11.32 [8.01, 11.55] | 0.07 [0.05, 0.07] | 0.64 [0.63, 0.64] | 10.30 [7.27, 10.52] |
| Mock | Host zero | 5 | 10.06 [9.64, 12.26] | 0.06 [0.06, 0.07] | 0.65 [0.63, 0.65] | 9.15 [8.81, 11.25] |
| Mock | Molysis | 5 | 10.53 [8.16, 10.58] | 0.06 [0.05, 0.06] | 0.64 [0.64, 0.65] | 9.58 [7.45, 9.79] |
| Mock | QIAamp | 5 | 11.97 [11.83, 13.59] | 0.07 [0.07, 0.07] | 0.64 [0.61, 0.64] | 11.04 [10.87, 12.32] |
| BAL | Untreated | 5 | 15.73 [6.35, 15.92] | 12.92 [5.21, 12.94] | 99.72 [99.59, 99.75] | 0.03 [0.03, 0.04] |
| BAL | lyPMA | 5 | 5.72 [3.59, 13.41] | 4.65 [2.79, 10.90] | 99.08 [97.84, 99.46] | 0.06 [0.04, 0.10] |
| BAL | Benzonase | 5 | 18.59 [16.20, 23.63] | 14.77 [12.80, 18.16] | 98.81 [98.72, 98.92] | 0.17 [0.16, 0.22] |
| BAL | Host zero | 5 | 4.57 [2.32, 4.71] | 2.69 [1.61, 2.93] | 83.65 [76.75, 87.19] | 0.24 [0.13, 0.82] |
| BAL | Molysis | 5 | 4.76 [3.57, 4.86] | 2.78 [1.39, 3.61] | 92.52 [92.48, 93.61] | 0.29 [0.13, 1.56] |
| BAL | QIAamp | 5 | 17.19 [15.35, 17.43] | 11.87 [10.79, 12.22] | 98.35 [92.28, 98.57] | 0.26 [0.10, 1.02] |
| Nasal | Untreated | 10 | 13.09 [7.73, 16.93] | 10.05 [6.11, 13.04] | 94.05 [92.82, 97.87] | 0.48 [0.10, 0.87] |
| Nasal | lyPMA | 5 | 0.98 [0.85, 1.24] | 0.63 [0.28, 0.88] | 91.25 [35.63, 91.64] | 0.07 [0.06, 0.08] |
| Nasal | Benzonase | 5 | 5.75 [4.95, 6.57] | 3.66 [1.29, 5.05] | 78.71 [77.84, 94.79] | 0.28 [0.26, 1.04] |
| Nasal | Host zero | 5 | 2.83 [1.42, 6.42] | 0.49 [0.03, 0.81] | 8.92 [2.70, 30.39] | 2.43 [0.97, 5.03] |
| Nasal | Molysis | 5 | 0.99 [0.63, 4.08] | 0.42 [0.06, 0.64] | 49.94 [5.04, 78.44] | 0.32 [0.17, 2.53] |
| Nasal | QIAamp | 5 | 6.40 [6.40, 6.80] | 0.86 [0.86, 1.17] | 20.06 [15.74, 23.21] | 4.63 [4.50, 4.67] |
| Sputum | Untreated | 5 | 8.59 [8.25, 9.27] | 6.87 [6.69, 7.50] | 99.19 [98.86, 99.21] | 0.06 [0.06, 0.09] |
| Sputum | lyPMA | 5 | 10.98 [5.22, 12.78] | 8.82 [3.76, 10.44] | 96.38 [92.54, 98.28] | 0.25 [0.15, 0.44] |
| Sputum | Benzonase | 5 | 10.76 [10.34, 10.82] | 7.81 [7.75, 8.24] | 94.19 [92.94, 94.47] | 0.47 [0.45, 0.59] |
| Sputum | Host zero | 5 | 13.14 [7.64, 13.95] | 4.39 [3.80, 7.71] | 61.67 [37.50, 68.00] | 2.91 [2.36, 3.67] |
| Sputum | Molysis | 5 | 12.59 [10.84, 13.73] | 2.98 [1.83, 4.28] | 32.79 [17.02, 33.83] | 6.11 [5.56, 8.37] |
| Sputum | QIAamp | 5 | 12.35 [12.23, 12.85] | 9.08 [8.41, 9.27] | 88.18 [68.85, 88.64] | 1.16 [1.13, 3.89] |
Figure of reads by treatment (z-score)
Changes were sample type * treatment specific
# Summary figures - facet and z-score -------------------------------------
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
gather(feature, value, Raw_reads:sequencing_host_prop) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "sequencing_host_prop")) %>%
mutate(z_score = scale(value),
feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "sequencing_host_prop"), labels = c("Raw reads", "Host mapped", "Final reads", "Host %"))) %>%
ggplot(aes(x = treatment, y = z_score, fill = treatment)) +
geom_boxplot(lwd = 0.2) +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
facet_grid(sample_type~feature) +
xlab("Treatment") +
ylab("Z score") +
theme_classic(base_family = "serif", base_size = 14) +
guides( x = guide_axis(angle = 90)) +
theme(legend.position = "top") +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment") #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
Results:
2.1. There were no differences in raw reads.
2.2. However, final reads increased after some treatment, and host DNA proportion decreased
QC3. Positive and negative controls
Positive and negative controls were compared with mock community
Reads and host % by treatment
Species richness of controls
Some possible contaminants were identified in extraction controls
#Loading theoretical mock community
zymo_mock <- read_excel("/Users/minsikkim/Dropbox (Partners HealthCare)/@minsik/project_sicas2/data_raw/DAR_20210929_zymo_mock_data.xlsx") %>%
data.frame(row.names = T) %>% rename(mock_theoretical = Mock) %>% mutate(mock_theoretical = mock_theoretical/100) %>%
merge_phyloseq(otu_table(., taxa_are_rows = T), tax_table(phyloseq$phyloseq_count))
phyloseq_mock <- rbind(c("mock_theoretical", "Mock theoretical", "-")) %>% data.frame() %>%
column_to_rownames(var = "X1") %>% rename(sample_type = X2, treatment = X3) %>% #making sample_data of mock community
merge_phyloseq(sample_data(.), zymo_mock)
phyloseq_control_rel <- subset_samples(phyloseq$phyloseq_rel, sample_type == "Mock" | sample_type == "Neg.") #adding data of controls
sample_data(phyloseq_control_rel)$treatment <- sample_data(phyloseq_control_rel)$treatment %>% as.character()
sample_data(phyloseq_control_rel)$sample_type <- sample_data(phyloseq_control_rel)$sample_type %>% as.character()
phyloseq_control_rel <- merge_phyloseq(phyloseq_control_rel, phyloseq_mock)
#Species richness of each control groups
sample_data(phyloseq_control_rel)$S.obs <- rowSums(t(otu_table(phyloseq_control_rel)) != 0)
sample_data(phyloseq_control_rel)$sample_type <-
factor(sample_data(phyloseq_control_rel)$sample_type, levels = c("Mock theoretical", "Mock", "Neg."))
sample_data(phyloseq_control_rel)$teratment <-
factor(sample_data(phyloseq_control_rel)$treatment, levels = c("-", "Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"))
phyloseq_control_rel %>%
sample_data() %>%
#mutate(sample_type = factor(sample_type, levels = c("Mock", "Neg.")),
# treatment = factor(treatment, levels = c("Theoretical", "Untreated", "Benzonase", "Host zero", "Molysis", "QIAamp"))) %>%
group_by(sample_type, treatment) %>%
summarise(Mean = mean(S.obs),
SD = sd(S.obs),) %>%
kbl(format = "html", caption = "Species richness of controls") %>%
kable_styling(full_width = 0, html_font = "serif")
| sample_type | treatment | Mean | SD |
|---|---|---|---|
| Mock theoretical |
|
10.000000 | NA |
| Mock | Benzonase | 24.400000 | 1.1401754 |
| Mock | Host zero | 27.000000 | 7.8421936 |
| Mock | Molysis | 28.400000 | 1.1401754 |
| Mock | QIAamp | 25.800000 | 0.4472136 |
| Mock | Untreated | 41.333333 | 1.5055453 |
| Mock | lyPMA | 39.800000 | 15.9279628 |
| Neg. | Benzonase | 6.800000 | 2.3874673 |
| Neg. | Host zero | 9.400000 | 2.1908902 |
| Neg. | Molysis | 8.200000 | 3.3466401 |
| Neg. | QIAamp | 9.200000 | 3.4205263 |
| Neg. | Untreated | 8.333333 | 7.9162281 |
| Neg. | lyPMA | 11.000000 | 12.3490890 |
Bar plot of controls
Some possible contaminants were identified in extraction controls
Some changes visible at postive control….
#Manipulating phyloseq - only top 10
tax_table(phyloseq_control_rel) %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(phyloseq_control_rel) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 8] <- .[, 8] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq_control_rel
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Bar plot of control data") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ sample_type, scales= "free_x", nrow=1)
#there could be opportunistic pathogens...
Bar plot of controls (Positive)
Some possible contaminants were identified in extraction controls
Gram negatives were fragile to depletion method at postivive control
#Manipulating phyloseq - only top 10
phyloseq_control_rel %>%
subset_samples(., sample_type == "Mock") %>%
tax_table() %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq_control_rel,sample_type == "Mock" & S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 8] <- .[, 8] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- subset_samples(phyloseq_control_rel,sample_type == "Mock" & S.obs != 0)
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Postive controls") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ factor(treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")),
scales= "free_x", nrow=1)
#there could be opportunistic pathogens...
Issue 1 - gram negative
Benzonse and Host zero depleted all the gram negative strains
Others decrased gram negatives a lot, but not became zero
# gram stain data
phyloseq_control_rel %>%
subset_samples(., sample_type == "Mock") %>%
tax_table() %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq_control_rel,sample_type == "Mock" & S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 8] <- .[, 8] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- subset_samples(phyloseq_control_rel,sample_type == "Mock" & S.obs != 0)
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
plot_bar(., fill="Gram") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Gram stain in Zymo mock") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Gram-stain")) +
facet_wrap (~ factor(treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")),
scales= "free_x", nrow=1)
#there could be opportunistic pathogens...
#Manipulating phyloseq - only top 10
sample_data(phyloseq_control_rel) <- cbind(phyloseq_control_rel %>%
sample_data %>%
data.frame(),
phyloseq_control_rel %>%
otu_table %>%
data.frame %>%
subset(., rownames(.) %in% head(taxa_sums(subset_samples(phyloseq_control_rel,sample_type == "Mock" & S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)) %>%
t()
)
sample_data(phyloseq_control_rel) %>%
data.frame() %>%
subset(., !is.na(.$Escherichia_coli)) %>%
group_by(sample_type, treatment) %>%
summarise(N = n(),
`<i>Escherichia coli</i><br>(median [IQR])` = paste(format(round(median(Escherichia_coli),4),nsmall = 4, big.mark = ","), " [", format(round(quantile(Escherichia_coli, 0.25),4), nsmall = 4, big.mark = ","), ", ", format(round(quantile(Escherichia_coli, 0.75),4), nsmall = 4, big.mark = ","), "]", sep = ""),
`<i>Pseudomonas aeruginosa</i><br>(median [IQR])` = paste(format(round(median(Pseudomonas_aeruginosa_group),4),nsmall = 4, big.mark = ","), " [", format(round(quantile(Pseudomonas_aeruginosa_group, 0.25),4), nsmall = 4, big.mark = ","), ", ", format(round(quantile(Pseudomonas_aeruginosa_group, 0.75),4), nsmall = 4, big.mark = ","), "]", sep = ""),
`<i>Salmonella enterica</i><br>(median [IQR])` = paste(format(round(median(Salmonella_enterica),4),nsmall = 4, big.mark = ","), " [", format(round(quantile(Salmonella_enterica, 0.25),4), nsmall = 4, big.mark = ","), ", ", format(round(quantile(Salmonella_enterica, 0.75),4), nsmall = 4, big.mark = ","), "]", sep = "")
) %>%
rename(`Sample type` = sample_type) %>%
data.frame(check.names = F) %>% mutate_all(linebreak) %>% kbl(format = "html", escape = F) %>% kable_styling(full_width = 0, html_font = "serif")
| Sample type | treatment | N |
Escherichia coli (median [IQR]) |
Pseudomonas aeruginosa (median [IQR]) |
Salmonella enterica (median [IQR]) |
|---|---|---|---|---|---|
| Mock theoretical |
|
1 | 0.1200 [0.1200, 0.1200] | 0.1200 [0.1200, 0.1200] | 0.1200 [0.1200, 0.1200] |
| Mock | Benzonase | 5 | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] |
| Mock | Host zero | 5 | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] |
| Mock | Molysis | 5 | 0.0029 [0.0022, 0.0044] | 5e-04 [2e-04, 6e-04] | 0.0031 [0.0022, 0.0036] |
| Mock | QIAamp | 5 | 5e-04 [5e-04, 0.0010] | 1e-04 [1e-04, 1e-04] | 5e-04 [5e-04, 7e-04] |
| Mock | Untreated | 6 | 0.3118 [0.2964, 0.3231] | 0.0791 [0.0771, 0.0810] | 0.2405 [0.2314, 0.2467] |
| Mock | lyPMA | 5 | 0.1514 [0.1427, 0.2281] | 0.0477 [0.0443, 0.0576] | 0.1168 [0.1035, 0.1840] |
| Neg. | Benzonase | 5 | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] |
| Neg. | Host zero | 5 | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] |
| Neg. | Molysis | 5 | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] |
| Neg. | QIAamp | 5 | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] |
| Neg. | Untreated | 6 | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] |
| Neg. | lyPMA | 5 | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] | 0.0000 [0.0000, 0.0000] |
Issue 2 - Positive controls contaminants
Some possible contaminants were identified in most of samples
This could be 1) background contamination or 2) cross-contamination from kingfisher. Most of these are gram positives. Negative controls should be double-checked
#Manipulating phyloseq - only top 10
phyloseq_control_rel_contam <- subset_taxa(phyloseq_control_rel , !(taxa_names(phyloseq_control_rel) %in% head(taxa_sums(subset_samples(phyloseq_control_rel,
sample_type == "Mock" & S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10))
)
phyloseq_control_rel_contam <- subset_taxa(phyloseq_control_rel_contam, taxa_sums(phyloseq_control_rel_contam) != 0)
phyloseq_control_rel_contam <- subset_samples(phyloseq_control_rel_contam, sample_type != "Neg." & S.obs != 0)
tax_table(phyloseq_control_rel_contam) %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(phyloseq_control_rel_contam) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- .[, 9] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq_control_rel_contam
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Contaminants in Zymo mock") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ factor(treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")),
scales= "free_x", nrow=1)
Negative controls
Contaminants of positive and negative control do not match
It seems without host DNA, gram-negatives are vulnerable to depletion methods.
These negative contaminants highly likely introduced after-depletion
#Manipulating phyloseq - only top 10
tax_table(phyloseq_control_rel) %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(phyloseq_control_rel) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- .[, 9] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq_control_rel
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
subset_samples(., sample_type == "Neg.") %>%
plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Barplot of neg. data") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ factor(treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")),
scales= "free_x", nrow=1)
phyloseq_control_rel %>%
subset_samples(., sample_type == "Neg.") %>%
plot_bar(., fill="Gram") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Gram-stain of negative data") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ factor(treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")),
scales= "free_x", nrow=1)
Samples - gram-stain?
Nasal swab had low gram positive/pattern consisted after depletion
BAL showed Similar gram - / + ratio
Sputum showed high decrease in gram negative bacteria
Freeze/thaw cycle could be associated
Currently no further analysis is possible
#Manipulating phyloseq - only top 10
tax_table(phyloseq$phyloseq_rel) %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(phyloseq$phyloseq_rel) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- .[, 9] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq$phyloseq_rel
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
subset_samples(., sample_type == "Nasal") %>%
plot_bar(., fill="Gram") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Gram stain of nasal samples") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ factor(treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")),
scales= "free_x", nrow=1)
tax_table(phyloseq$phyloseq_rel) %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(phyloseq$phyloseq_rel) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- .[, 9] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq$phyloseq_rel
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
subset_samples(., sample_type == "BAL") %>%
plot_bar(., fill="Gram") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Gram stain of BAL samples") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ factor(treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")),
scales= "free_x", nrow=1)
tax_table(phyloseq$phyloseq_rel) %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(phyloseq$phyloseq_rel) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- .[, 9] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq$phyloseq_rel
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
subset_samples(., sample_type == "Sputum") %>%
plot_bar(., fill="Gram") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Gram stain of sputum samples") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ factor(treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")),
scales= "free_x", nrow=1)
Results
2.3.1. Negative control showed minimal number of possible contaminants
2.3.2. Positive control contained various contaminants
QC4. Prevalence and abundacne filtering - red flag
Taxa prevance and abundance were checked.
Taxa abundance and prevalence
Histogram of prelanence taxa
No prevalence or abundance filtering (each experimental group is 5% of total sample)
#Calculation of sample prevalence, standard deviation, median abundance across all samples for all bugs and making into a table.
#
#• In initial analysis we will not perform prevalence or abundance filtering (though we may consider this for secondary differential abundance analyses to manage p (features) > n (sample size) problem and issues with multiple hypothesis correction)
taxa_qc <- data.frame("species" = otu_table(phyloseq$phyloseq_rel) %>% t() %>% colnames(),
"prevalence" = ifelse(phyloseq$phyloseq_count %>% otu_table() > 0, 1, 0) %>% t() %>% colSums(), #Prevalence of taxa
"mean_rel_abd" = phyloseq$phyloseq_rel %>% otu_table() %>% t() %>% colMeans(na.rm = T) #mean relativ abundacne
)
hist(log10(taxa_qc$prevalence), xlab = "log10(Taxa prevalence)", main = "Histogram of prevalence of taxa")
Histogram of mean abundance
hist(log10(taxa_qc$mean_rel_abd), xlab = "log10(Mean relative abundance)", main = "Histogram of mean relative abundance")
Red flag taxa
Taxa with low prevalences were red-flagged
red_flag_taxa <- data.frame(species = taxa_qc$species,
red_flag_prev_abd = ifelse(taxa_qc$prevalence < otu_table(phyloseq$phyloseq_rel) %>% t %>% rownames() %>% length * 0.05 & taxa_qc$mean_rel_abd < quantile(taxa_qc$mean_rel_abd, 0.75), 1, 0))
red_flag_taxa
QC 3 results:
3.1. In initial analysis we will not perform prevalence or abundance filtering (though we may consider this for secondary differentialabundance analyses to manage p (features) > n (sample size) problem and issues with multiple hypothesis correction)
3.2. Red flags were made for taxa not satisfying the criteria (prev < 0.05 & mean rel < 0.75Q)
3.3. Although we don’t consider the prevalence of abundance at this time, we can consider their red-flags after running the DA analysis
Analysis
Before anlayzing, alpha diversity indices were calculated for all phyloseq objects
alpha_diversity <- function(data) {
otu_table <- otu_table(data) %>% .[colSums(.) !=0]
S.obs <- rowSums(t(otu_table) != 0)
sample_data <- sample_data(data)
data_evenness <- vegan::diversity(t(otu_table)) / log(vegan::specnumber(t(otu_table))) # calculate evenness index using vegan package
data_shannon <- vegan::diversity(t(otu_table), index = "shannon") # calculate Shannon index using vegan package
data_hill <- exp(data_shannon) # calculate Hills index
data_dominance <- microbiome::dominance(otu_table, index = "all", rank = 1, aggregate = TRUE) # dominance (Berger-Parker index), etc.
data_invsimpson <- vegan::diversity(t(otu_table), index = "invsimpson") # calculate Shannon index using vegan package
alpha_diversity <- cbind(S.obs, data_shannon, data_hill, data_invsimpson, data_evenness,data_dominance) # combine all indices in one data table
sample_data <- merge(data.frame(sample_data), alpha_diversity, by = 0, all = T) %>% column_to_rownames(var = "Row.names")
}
sample_data(phyloseq$phyloseq_rel) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_count) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_path_rpkm) <- sample_data(alpha_diversity(phyloseq$phyloseq_path_rpkm))
A1. Host DNA, bacterial DNA by smaple type and treatment
qPCR and sequencing results
qPCR result
#2A: Change in total DNA (qPCR)
f2a <- ggplot(sample_data, aes(x = sample_type, y = log10(DNA_host_nondil + DNA_bac_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("log<sub>10</sub>(qPCR total DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "A") +
#scale_x_discrete(label = c( "Mock", "Neg.", "BAL", "Nasal", "Sputum")) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2B: Change in human DNA (qPCR)
f2b <- ggplot(sample_data, aes(x = sample_type, y = log10(DNA_host_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3")) +
ylab("log<sub>10</sub>(qPCR host DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif")+
labs(tag = "B") +
#scale_x_discrete(label = c("BAL", "Nasal", "Sputum", "Mock", "Neg.")) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2C: Change in 16S DNA (qPCR)
f2c <- ggplot(sample_data, aes(x = sample_type, y = log10(DNA_bac_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3")) +
ylab("log<sub>10</sub>(qPCR bacterial DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif")+
labs(tag = "C") +
#scale_x_discrete(label = c("BAL", "Nasal", "Sputum", "Mock", "Neg.")) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2D. Change in % host (qPCR)
f2d <- ggplot(sample_data, aes(x = sample_type, y = host_proportion)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3")) +
ylab("Host DNA ratio") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "D") +
#scale_x_discrete(label = c("BAL", "Nasal", "Sputum", "Mock", "Neg.")) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#output for markdown
ggarrange(f2a, f2b, f2c, f2d, common.legend = T , align = "hv")
Figure 2. qPCR result of host depletion study. A. Total DNA B. Host DNA C. Bacterial DNA D. Host %
Sequencing result
f3a <- ggplot(sample_data, aes(x = sample_type, y = log10(Raw_reads))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
theme_classic (base_size = 12, base_family = "serif") +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Sample type") +
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15)) +
ylab("log<sub>10</sub>(raw reads)") +
labs(tag = "A") +
guides(fill = guide_legend(nrow = 1))
# - Host_mapped
f3b <- ggplot(sample_data, aes(x = sample_type, y = log10(Host_mapped))) +
theme_classic (base_size = 12, base_family = "serif")+
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Sample type")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15)) +
ylab("log<sub>10</sub>(host reads)") +
labs(tag = "B") +
guides(fill = guide_legend(nrow = 1))
# - % Host (we have used Host_mapped/Raw_reads in prior papers)
# - Final_reads
f3c <- ggplot(sample_data, aes(x = sample_type, y = log10(Final_reads))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Sample type")+
ylab("log<sub>10</sub>(final reads)") +
theme_classic (base_size = 12, base_family = "serif") +
theme(axis.title.y = element_markdown(),
plot.tag = element_markdown(size = 15)) +
labs(tag = "C") +
guides(fill = guide_legend(nrow = 1))
# - % Host (we have used Host_mapped/Raw_reads in prior papers)
f3d <- ggplot(sample_data, aes(x = sample_type, y = sequencing_host_prop)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
theme_classic (base_size = 12, base_family = "serif")+
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Sample type")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15)) +
ylab("Host ratio by sequencing") +
labs(tag = "D") +
guides(fill = guide_legend(nrow = 1))
ggarrange(f3a, f3b, f3c, f3d, common.legend = T, align = "hv")
Figure 3. Sequencing result of host depletion study. A. Total DNA B. Host DNA C. Bacterial DNA D. Host %
Results A1
1.1. Some changed were observed, for both host DNA and bacterial DNA.
1.2. Sequencing results need to be added
This will be Fig 2. of the manuscript, after removing positives and negatives
A2. Modeling on sequencing results
As some changed were observed after treatment, linear mixed effect models were employed for testing.
Test results
Library failure - ANOVA
Some samples failed in library prep. What type of sample were fragile to treatments?
glm ( library fail ~ sample_type + treatment + sample_type * treatment + subject_id )
glmer(lib_failed ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
Anova %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>Chisq)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>% column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Chisq | Df | Pr(>Chisq) | ||
|---|---|---|---|---|
| sample_type | 20.19828 | 4 | 0.0004563 |
|
| treatment | 41.67568 | 5 | 0.0000001 |
|
| sample_type * treatment | 92.13915 | 20 | 0.0000000 |
|
Library failure
glm ( sequencing fail ~ sample_type + treatment + sample_type * treatment + subject_id )
Nasals were fragile to lyPMA and Molysis
glmer(lib_failed ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`t value`) > 2 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | t value | ||
|---|---|---|---|---|
| (Intercept) | 0.0000000 | 0.1317706 | 0.0000000 | |
| Mock | 0.0000000 | 0.1863518 | 0.0000000 | |
| BAL | 0.0000000 | 0.1630767 | 0.0000000 | |
| Nasal | 0.0000000 | 0.1482523 | 0.0000000 | |
| Sputum | 0.0000000 | 0.1630767 | 0.0000000 | |
| lyPMA | 0.0000000 | 0.1125488 | 0.0000000 | |
| Benzonase | 0.0000000 | 0.1125488 | 0.0000000 | |
| Host zero | 0.0000000 | 0.1125488 | 0.0000000 | |
| Molysis | 0.0000000 | 0.1125488 | 0.0000000 | |
| QIAamp | 0.0000000 | 0.1125488 | 0.0000000 | |
| Mock * lyPMA | 0.0000000 | 0.1591681 | 0.0000000 | |
| BAL * lyPMA | 0.2000000 | 0.1627453 | 1.2289140 | |
| Nasal * lyPMA | 0.8187152 | 0.1541029 | 5.3127843 |
|
| Sputum * lyPMA | 0.0000000 | 0.1627453 | 0.0000000 | |
| Mock * Benzonase | 0.0000000 | 0.1591681 | 0.0000000 | |
| BAL * Benzonase | 0.0000000 | 0.1627453 | 0.0000000 | |
| Nasal * Benzonase | -0.0424433 | 0.1542275 | -0.2751992 | |
| Sputum * Benzonase | 0.0000000 | 0.1627453 | 0.0000000 | |
| Mock * Host zero | 0.0000000 | 0.1591681 | 0.0000000 | |
| BAL * Host zero | 0.2000000 | 0.1627453 | 1.2289140 | |
| Nasal * Host zero | 0.3575567 | 0.1542275 | 2.3183715 |
|
| Sputum * Host zero | 0.0000000 | 0.1627453 | 0.0000000 | |
| Mock * Molysis | 0.0000000 | 0.1591681 | 0.0000000 | |
| BAL * Molysis | 0.2000000 | 0.1627453 | 1.2289140 | |
| Nasal * Molysis | 0.7812848 | 0.1541029 | 5.0698914 |
|
| Sputum * Molysis | 0.0000000 | 0.1627453 | 0.0000000 | |
| Mock * QIAamp | 0.0000000 | 0.1591681 | 0.0000000 | |
| BAL * QIAamp | 0.0000000 | 0.1627453 | 0.0000000 | |
| Nasal * QIAamp | 0.0424433 | 0.1542275 | 0.2751992 | |
| Sputum * QIAamp | 0.0000000 | 0.1627453 | 0.0000000 |
Sequencing failure
Modeling of sequencing failure were not available due to low number of cases.
BAL079 - control & lyPMA failed sequencing.
sample_data(phyloseq$phyloseq_count) %>% data.frame %>% mutate(sequencing_fail = (S.obs == 0)) %>%
glmer(sequencing_fail ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = .) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`t value`) > 2 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | t value | ||
|---|---|---|---|---|
| (Intercept) | 0.0 | 0.0765611 | 0.000000 | |
| Mock | 0.0 | 0.1082738 | 0.000000 | |
| BAL | 0.2 | 0.0928632 | 2.153706 |
|
| Nasal | 0.0 | 0.0851034 | 0.000000 | |
| Sputum | 0.0 | 0.0928632 | 0.000000 | |
| lyPMA | 0.0 | 0.0591372 | 0.000000 | |
| Benzonase | 0.0 | 0.0591372 | 0.000000 | |
| Host zero | 0.0 | 0.0591372 | 0.000000 | |
| Molysis | 0.0 | 0.0591372 | 0.000000 | |
| QIAamp | 0.0 | 0.0591372 | 0.000000 | |
| Mock * lyPMA | 0.0 | 0.0836326 | 0.000000 | |
| BAL * lyPMA | 0.0 | 0.0855122 | 0.000000 | |
| Nasal * lyPMA | 0.0 | 0.0811995 | 0.000000 | |
| Sputum * lyPMA | 0.0 | 0.0855122 | 0.000000 | |
| Mock * Benzonase | 0.0 | 0.0836326 | 0.000000 | |
| BAL * Benzonase | -0.2 | 0.0855122 | -2.338848 |
|
| Nasal * Benzonase | 0.0 | 0.0812882 | 0.000000 | |
| Sputum * Benzonase | 0.0 | 0.0855122 | 0.000000 | |
| Mock * Host zero | 0.0 | 0.0836326 | 0.000000 | |
| BAL * Host zero | -0.2 | 0.0855122 | -2.338848 |
|
| Nasal * Host zero | 0.0 | 0.0812882 | 0.000000 | |
| Sputum * Host zero | 0.0 | 0.0855122 | 0.000000 | |
| Mock * Molysis | 0.0 | 0.0836326 | 0.000000 | |
| BAL * Molysis | -0.2 | 0.0855122 | -2.338848 |
|
| Nasal * Molysis | 0.0 | 0.0811995 | 0.000000 | |
| Sputum * Molysis | 0.0 | 0.0855122 | 0.000000 | |
| Mock * QIAamp | 0.0 | 0.0836326 | 0.000000 | |
| BAL * QIAamp | -0.2 | 0.0855122 | -2.338848 |
|
| Nasal * QIAamp | 0.0 | 0.0812882 | 0.000000 | |
| Sputum * QIAamp | 0.0 | 0.0855122 | 0.000000 |
log10(Final reads) - ANOVA
Which methods was effective in increasing the final reads?
Interaction term was significant
lmer(log10(Final_reads) ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
anova %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>% column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 7.219703 | 1.8049257 | 4 | 10.20498 | 11.573353 | 0.0008358 |
|
| treatment | 16.897219 | 3.3794439 | 5 | 113.82452 | 21.669311 | 0.0000000 |
|
| sample_type * treatment | 14.273189 | 0.7136594 | 20 | 111.51262 | 4.576051 | 0.0000001 |
|
log10(Final reads)
Which methods was effective in increasing the final reads?
lmer( log10(Final reads) vs sample_type + treatment + sample_type * treatment + subject_id )
Except lyPMA, every methods increased final reads
lmer(log10(Final_reads) ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 5.8969160 | 0.2896787 | 13.69170 | 20.3567474 | 0.0000000 |
|
| Mock | 2.0643827 | 0.4096675 | 13.69170 | 5.0391660 | 0.0001936 |
|
| BAL | -0.4036097 | 0.3559340 | 21.62209 | -1.1339451 | 0.2692339 | |
| Nasal | 0.6201093 | 0.3245018 | 17.83245 | 1.9109582 | 0.0722216 | |
| Sputum | -0.0703197 | 0.3559340 | 21.62209 | -0.1975637 | 0.8452319 | |
| lyPMA | 0.3624825 | 0.2391309 | 110.63626 | 1.5158327 | 0.1324137 | |
| Benzonase | 0.0678140 | 0.2391309 | 110.63626 | 0.2835852 | 0.7772583 | |
| Host zero | 0.2506613 | 0.2391309 | 110.63626 | 1.0482178 | 0.2968228 | |
| Molysis | 0.1413267 | 0.2391309 | 110.63626 | 0.5910015 | 0.5557249 | |
| QIAamp | 0.1588315 | 0.2391309 | 110.63626 | 0.6642031 | 0.5079422 | |
| Mock * lyPMA | -1.1298459 | 0.3381822 | 110.63626 | -3.3409380 | 0.0011392 |
|
| BAL * lyPMA | -0.0103440 | 0.3457828 | 110.63626 | -0.0299146 | 0.9761890 | |
| Nasal * lyPMA | -0.9035047 | 0.3277361 | 116.97484 | -2.7568060 | 0.0067736 |
|
| Sputum * lyPMA | 0.1775924 | 0.3457828 | 110.63626 | 0.5135953 | 0.6085591 | |
| Mock * Benzonase | -0.0827988 | 0.3381822 | 110.63626 | -0.2448348 | 0.8070379 | |
| BAL * Benzonase | 0.7430909 | 0.3457828 | 110.63626 | 2.1490108 | 0.0338131 |
|
| Nasal * Benzonase | 0.0750795 | 0.3280318 | 117.54161 | 0.2288786 | 0.8193610 | |
| Sputum * Benzonase | 0.7780564 | 0.3457828 | 110.63626 | 2.2501306 | 0.0264186 |
|
| Mock * Host zero | -0.2212240 | 0.3381822 | 110.63626 | -0.6541561 | 0.5143687 | |
| BAL * Host zero | 0.6995291 | 0.3457828 | 110.63626 | 2.0230305 | 0.0454812 |
|
| Nasal * Host zero | 0.6045330 | 0.3280318 | 117.54161 | 1.8429094 | 0.0678619 | |
| Sputum * Host zero | 1.4226694 | 0.3457828 | 110.63626 | 4.1143443 | 0.0000750 |
|
| Mock * Molysis | -0.1604345 | 0.3381822 | 110.63626 | -0.4744026 | 0.6361479 | |
| BAL * Molysis | 0.8944771 | 0.3457828 | 110.63626 | 2.5868180 | 0.0109839 |
|
| Nasal * Molysis | 0.0690814 | 0.3277361 | 116.97484 | 0.2107838 | 0.8334227 | |
| Sputum * Molysis | 1.8487779 | 0.3457828 | 110.63626 | 5.3466454 | 0.0000005 |
|
| Mock * QIAamp | -0.0570843 | 0.3381822 | 110.63626 | -0.1687973 | 0.8662643 | |
| BAL * QIAamp | 0.8892670 | 0.3457828 | 110.63626 | 2.5717504 | 0.0114447 |
|
| Nasal * QIAamp | 0.8921752 | 0.3280318 | 117.54161 | 2.7197824 | 0.0075237 |
|
| Sputum * QIAamp | 1.2569571 | 0.3457828 | 110.63626 | 3.6351063 | 0.0004232 |
|
Host ratio ANOVA
Which methods was effective in lowering host %
Interaction term was significant
lmer(sequencing_host_prop ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
anova %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>% column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 1.140989 | 0.2852472 | 4 | 12.90241 | 19.94979 | 1.96e-05 |
|
| treatment | 1.844330 | 0.3688660 | 5 | 112.48557 | 25.79798 | 0.00e+00 |
|
| sample_type * treatment | 3.013480 | 0.1506740 | 20 | 111.00500 | 10.53793 | 0.00e+00 |
|
Host ratio
Which methods was effective in lowering host %
lmer( Host DNA ratio vs sample_type + treatment + sample_type * treatment + (1|subject_id) )
Host zero was effect to to all types. Molysis was effective to Nasal and sputum. QIAamp was effective for Nasal only.
lmer(sequencing_host_prop ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.1580489 | 0.1236466 | 14.44554 | 1.2782307 | 0.2213255 | |
| Mock | -0.1555157 | 0.1748628 | 14.44554 | -0.8893586 | 0.3883812 | |
| BAL | 0.8383419 | 0.1439765 | 18.45998 | 5.8227692 | 0.0000147 |
|
| Nasal | 0.7942119 | 0.1341971 | 16.58294 | 5.9182492 | 0.0000187 |
|
| Sputum | 0.8317866 | 0.1439765 | 18.45998 | 5.7772389 | 0.0000162 |
|
| lyPMA | -0.0125376 | 0.0724064 | 109.71034 | -0.1731564 | 0.8628476 | |
| Benzonase | 0.0820089 | 0.0724064 | 109.71034 | 1.1326192 | 0.2598435 | |
| Host zero | 0.0513961 | 0.0724064 | 109.71034 | 0.7098273 | 0.4793172 | |
| Molysis | 0.0340392 | 0.0724064 | 109.71034 | 0.4701129 | 0.6392075 | |
| QIAamp | 0.0122709 | 0.0724064 | 109.71034 | 0.1694722 | 0.8657373 | |
| Mock * lyPMA | 0.0665440 | 0.1023982 | 109.71034 | 0.6498555 | 0.5171439 | |
| BAL * lyPMA | -0.0186970 | 0.1046995 | 109.71034 | -0.1785781 | 0.8585983 | |
| Nasal * lyPMA | -0.2655229 | 0.1001256 | 114.99868 | -2.6518985 | 0.0091339 |
|
| Sputum * lyPMA | -0.0255115 | 0.1046995 | 109.71034 | -0.2436637 | 0.8079463 | |
| Mock * Benzonase | -0.0782140 | 0.1023982 | 109.71034 | -0.7638225 | 0.4466126 | |
| BAL * Benzonase | -0.0934606 | 0.1046995 | 109.71034 | -0.8926556 | 0.3739954 | |
| Nasal * Benzonase | -0.2844798 | 0.1003189 | 115.63457 | -2.8357539 | 0.0053985 |
|
| Sputum * Benzonase | -0.1445493 | 0.1046995 | 109.71034 | -1.3806103 | 0.1702051 | |
| Mock * Host zero | -0.0466232 | 0.1023982 | 109.71034 | -0.4553132 | 0.6497838 | |
| BAL * Host zero | -0.2339486 | 0.1046995 | 109.71034 | -2.2344755 | 0.0274786 |
|
| Nasal * Host zero | -0.7898081 | 0.1003189 | 115.63457 | -7.8729711 | 0.0000000 |
|
| Sputum * Host zero | -0.5061958 | 0.1046995 | 109.71034 | -4.8347467 | 0.0000044 |
|
| Mock * Molysis | -0.0301512 | 0.1023982 | 109.71034 | -0.2944502 | 0.7689703 | |
| BAL * Molysis | -0.2110097 | 0.1046995 | 109.71034 | -2.0153834 | 0.0463093 |
|
| Nasal * Molysis | -0.5388071 | 0.1001256 | 114.99868 | -5.3813128 | 0.0000004 |
|
| Sputum * Molysis | -0.7303645 | 0.1046995 | 109.71034 | -6.9758137 | 0.0000000 |
|
| Mock * QIAamp | -0.0085052 | 0.1023982 | 109.71034 | -0.0830597 | 0.9339554 | |
| BAL * QIAamp | -0.0749318 | 0.1046995 | 109.71034 | -0.7156840 | 0.4757069 | |
| Nasal * QIAamp | -0.7638065 | 0.1003189 | 115.63457 | -7.6137819 | 0.0000000 |
|
| Sputum * QIAamp | -0.1992210 | 0.1046995 | 109.71034 | -1.9027875 | 0.0596896 |
Gram negatives - ANOVA
Which methods was changed gram-strain ratio?
Square root transformation was required
Interaction term was significant
hist(sample_data %>% data.frame %>% .$gram_neg)
hist(sample_data %>% data.frame %>% .$gram_neg %>% sqrt())
lmer(sqrt(gram_neg) ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
anova %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>% column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.2519926 | 0.0629982 | 4 | 15.27912 | 3.171971 | 0.0440715 |
|
| treatment | 2.0092804 | 0.4018561 | 5 | 110.53768 | 20.233539 | 0.0000000 |
|
| sample_type * treatment | 3.6648456 | 0.1832423 | 20 | 109.68797 | 9.226288 | 0.0000000 |
|
Gram negatives
Which method biased gram positive-negative ratio
lmer( Gram-negative ratio vs sample_type + treatment + sample_type * treatment + (1|subject_id) )
Some treatment (commercial) changed gram negative proportion
lmer(sqrt(gram_neg) ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*", .default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.6047523 | 0.1991381 | 16.03891 | 3.0368496 | 0.0078347 |
|
| Mock | 0.1893684 | 0.2816237 | 16.03891 | 0.6724164 | 0.5108903 | |
| BAL | 0.0050293 | 0.2282735 | 19.19085 | 0.0220321 | 0.9826498 | |
| Nasal | -0.4881059 | 0.2127831 | 17.27815 | -2.2939131 | 0.0345859 |
|
| Sputum | 0.2582871 | 0.2256043 | 18.34006 | 1.1448676 | 0.2669856 | |
| lyPMA | -0.0837507 | 0.0853365 | 108.50402 | -0.9814169 | 0.3285704 | |
| Benzonase | -0.2799287 | 0.0853365 | 108.50402 | -3.2802909 | 0.0013947 |
|
| Host zero | 0.1510556 | 0.0853365 | 108.50402 | 1.7701162 | 0.0795165 | |
| Molysis | 0.1211041 | 0.0853365 | 108.50402 | 1.4191349 | 0.1587260 | |
| QIAamp | -0.0908688 | 0.0853365 | 108.50402 | -1.0648287 | 0.2893173 | |
| Mock * lyPMA | -0.0896218 | 0.1206841 | 108.50402 | -0.7426149 | 0.4593196 | |
| BAL * lyPMA | 0.1732270 | 0.1311974 | 108.50402 | 1.3203533 | 0.1894955 | |
| Nasal * lyPMA | 0.6106762 | 0.1187428 | 112.04041 | 5.1428470 | 0.0000012 |
|
| Sputum * lyPMA | -0.2343407 | 0.1233964 | 108.50402 | -1.8990876 | 0.0602088 | |
| Mock * Benzonase | -0.3638032 | 0.1206841 | 108.50402 | -3.0145078 | 0.0032051 |
|
| BAL * Benzonase | 0.3259099 | 0.1282113 | 108.88628 | 2.5419740 | 0.0124295 |
|
| Nasal * Benzonase | 0.3421773 | 0.1190735 | 112.52671 | 2.8736635 | 0.0048518 |
|
| Sputum * Benzonase | -0.1759531 | 0.1233964 | 108.50402 | -1.4259168 | 0.1567631 | |
| Mock * Host zero | -0.7891645 | 0.1206841 | 108.50402 | -6.5390923 | 0.0000000 |
|
| BAL * Host zero | -0.1973889 | 0.1282113 | 108.88628 | -1.5395587 | 0.1265691 | |
| Nasal * Host zero | -0.1470414 | 0.1190735 | 112.52671 | -1.2348791 | 0.2194478 | |
| Sputum * Host zero | -0.7742055 | 0.1233964 | 108.50402 | -6.2741316 | 0.0000000 |
|
| Mock * Molysis | -0.7308145 | 0.1206841 | 108.50402 | -6.0555990 | 0.0000000 |
|
| BAL * Molysis | -0.1665528 | 0.1282113 | 108.88628 | -1.2990492 | 0.1966711 | |
| Nasal * Molysis | -0.0101193 | 0.1187428 | 112.04041 | -0.0852206 | 0.9322381 | |
| Sputum * Molysis | -0.7649241 | 0.1233964 | 108.50402 | -6.1989155 | 0.0000000 |
|
| Mock * QIAamp | -0.5198421 | 0.1206841 | 108.50402 | -4.3074614 | 0.0000364 |
|
| BAL * QIAamp | 0.0754132 | 0.1282113 | 108.88628 | 0.5881948 | 0.5576196 | |
| Nasal * QIAamp | 0.1199762 | 0.1190735 | 112.52671 | 1.0075804 | 0.3158177 | |
| Sputum * QIAamp | -0.5568238 | 0.1233964 | 108.50402 | -4.5124785 | 0.0000163 |
|
Results
1. Library failure was associated with Nasal, especially after lyPMA and Molysis treatment
2. Benzonase, host-zero, Molysis, and QIAamp increased final reads
3. Host-zero lowered host %. For otheres, there were significant sample_type specific treatment efficiencies
A3. LM of taxa alpha diversity
Alpha diversity could be having changes due to treatment.
Both stratified and nonstratified analyses were conducted.
Figure - Alpha diversity
sample_data <- sample_data(phyloseq$phyloseq_count)
f4a <- ggplot(subset(sample_data(phyloseq$phyloseq_count), sample_data$sample_type %in% c("Sputum", "Nasal", "BAL", "Mock", "Neg.")), aes(y = S.obs)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Species richness") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "A") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type, nrow = 1) +
guides(fill = guide_legend(nrow = 1))
f4b <- ggplot(subset(sample_data(phyloseq$phyloseq_count), sample_data$sample_type %in% c("Sputum", "Nasal", "BAL", "Mock", "Neg.")), aes(y = data_invsimpson)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Inverse simpson") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "B") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type, nrow = 1) +
guides(fill = guide_legend(nrow = 1))
ggarrange(f4a, f4b, common.legend = T, align = "hv", ncol = 1) # alpha diversity plots
Species richness
All samples:
S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
Stratified:
S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
Species richness (all samples & interaction term) - ANOVA
Interaction term was significant
sample_data <- sample_data(phyloseq$phyloseq_count) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
lmer_sob <- lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample), data = sample_data)
lmer_sob %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 9960.852 | 2490.213 | 4 | 18.45264 | 33.245597 | 0.0000000 |
|
| treatment | 1528.150 | 305.630 | 5 | 30.68974 | 4.080314 | 0.0058869 |
|
| log10(Final_reads) | 3981.388 | 3981.388 | 1 | 114.44646 | 53.153537 | 0.0000000 |
|
| sample_type:treatment | 21131.669 | 1056.583 | 20 | 51.28486 | 14.105921 | 0.0000000 |
|
Species richness (all samples & interaction term)
Increase at sputum was at every treatment Postive and negative control showed no changes
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -80.047738 | 17.008298 | 53.43860 | -4.7063933 | 0.0000183 |
|
| Mock | 1.734738 | 17.593398 | 18.71396 | 0.0986016 | 0.9225031 | |
| BAL | 4.860057 | 13.888167 | 20.21759 | 0.3499423 | 0.7300011 | |
| Nasal | -6.274752 | 12.973679 | 18.37580 | -0.4836525 | 0.6343457 | |
| Sputum | 9.214633 | 13.693099 | 19.16482 | 0.6729399 | 0.5090194 | |
| lyPMA | -2.635369 | 5.253967 | 108.47693 | -0.5015961 | 0.6169682 | |
| Benzonase | -2.043958 | 5.204004 | 108.30975 | -0.3927664 | 0.6952640 | |
| Host zero | -3.168911 | 5.227009 | 108.38739 | -0.6062569 | 0.5456118 | |
| Molysis | -2.139510 | 5.210088 | 108.33039 | -0.4106476 | 0.6821421 | |
| QIAamp | -1.000382 | 5.212165 | 108.33742 | -0.1919321 | 0.8481547 | |
| log10(Final_reads) | 14.902887 | 2.030006 | 116.27333 | 7.3413036 | 0.0000000 |
|
| Mock * lyPMA | 12.737965 | 7.706230 | 109.08208 | 1.6529438 | 0.1012177 | |
| BAL * lyPMA | -1.349276 | 7.998293 | 108.30443 | -0.1686955 | 0.8663508 | |
| Nasal * lyPMA | 5.285949 | 7.471377 | 112.12777 | 0.7074932 | 0.4807279 | |
| Sputum * lyPMA | 32.986694 | 7.530978 | 108.32384 | 4.3801343 | 0.0000275 |
|
| Mock * Benzonase | -13.666059 | 7.358917 | 108.30817 | -1.8570748 | 0.0660176 | |
| BAL * Benzonase | -4.719308 | 7.985511 | 109.21176 | -0.5909838 | 0.5557524 | |
| Nasal * Benzonase | -1.690600 | 7.260902 | 112.19270 | -0.2328361 | 0.8163129 | |
| Sputum * Benzonase | 57.838047 | 7.686374 | 108.67609 | 7.5247507 | 0.0000000 |
|
| Mock * Host zero | -10.603124 | 7.370691 | 108.33641 | -1.4385522 | 0.1531601 | |
| BAL * Host zero | -1.270111 | 7.967847 | 109.16695 | -0.1594045 | 0.8736447 | |
| Nasal * Host zero | 3.419015 | 7.369171 | 112.51396 | 0.4639620 | 0.6435714 | |
| Sputum * Host zero | 89.231452 | 8.057689 | 109.42878 | 11.0740750 | 0.0000000 |
|
| Mock * Molysis | -9.509063 | 7.364202 | 108.32087 | -1.2912549 | 0.1993641 | |
| BAL * Molysis | 10.224601 | 8.054150 | 109.37844 | 1.2694823 | 0.2069635 | |
| Nasal * Molysis | 4.816033 | 7.239822 | 111.70287 | 0.6652143 | 0.5072846 | |
| Sputum * Molysis | 95.881206 | 8.406598 | 110.03853 | 11.4054712 | 0.0000000 |
|
| Mock * QIAamp | -15.049280 | 7.357910 | 108.30575 | -2.0453200 | 0.0432470 |
|
| BAL * QIAamp | -4.097753 | 8.051603 | 109.37244 | -0.5089364 | 0.6118220 | |
| Nasal * QIAamp | -7.457621 | 7.469458 | 112.08778 | -0.9984152 | 0.3202286 | |
| Sputum * QIAamp | 65.901043 | 7.943329 | 109.20933 | 8.2964015 | 0.0000000 |
|
Species richness - stratified (Pos + Neg)
No treatment increased species richenss - after adjusting sequencing depth. With mock community except lyPMA, treatments showed they even reduced the possible contaminants. Need to observe alpha diversity of positive controls
lm(S.obs ~ sample_type * treatment + log10 (Final_reads), data = subset(sample_data, sample_data$sample_type == "Neg." | sample_data$sample_type == "Mock" )) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|
| (Intercept) | -77.260345 | 7.499432 | -10.3021600 | 0.0000000 |
|
| Mock | 2.710544 | 3.274859 | 0.8276826 | 0.4118633 | |
| lyPMA | -2.464028 | 2.166841 | -1.1371523 | 0.2610050 | |
| Benzonase | -2.011903 | 2.120747 | -0.9486763 | 0.3474409 | |
| Host zero | -3.050426 | 2.142040 | -1.4240754 | 0.1607621 | |
| Molysis | -2.072706 | 2.126390 | -0.9747538 | 0.3344702 | |
| QIAamp | -0.925304 | 2.128315 | -0.4347590 | 0.6656461 | |
| log10(Final_reads) | 14.430200 | 1.248465 | 11.5583574 | 0.0000000 |
|
| Mock * lyPMA | 12.203902 | 3.312177 | 3.6845556 | 0.0005722 |
|
| Mock * Benzonase | -13.705197 | 2.998581 | -4.5705604 | 0.0000331 |
|
| Mock * Host zero | -10.707694 | 3.009499 | -3.5579653 | 0.0008412 |
|
| Mock * Molysis | -9.584898 | 3.003485 | -3.1912586 | 0.0024731 |
|
| Mock * QIAamp | -15.076263 | 2.997646 | -5.0293668 | 0.0000070 |
|
Species richness - stratified (NS + Pos + Neg)
Molysis and host zero may incrased speciess richness of Nasal Data include nasal swab, positive depletion, and negative depletion
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal" | sample_data$sample_type == "Mock" | sample_data$sample_type == "Neg.")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -61.5968651 | 7.141345 | 45.200273 | -8.6253873 | 0.0000000 |
|
| Mock | 8.1939894 | 5.861474 | 8.818737 | 1.3979400 | 0.1962941 | |
| Nasal | -4.3344907 | 4.238467 | 8.089634 | -1.0226553 | 0.3360766 | |
| lyPMA | -1.5011967 | 2.229134 | 69.324661 | -0.6734439 | 0.5029040 | |
| Benzonase | -1.8317743 | 2.199470 | 69.068228 | -0.8328252 | 0.4078140 | |
| Host zero | -2.3846158 | 2.213143 | 69.187994 | -1.0774792 | 0.2850087 | |
| Molysis | -1.6973122 | 2.203088 | 69.100187 | -0.7704240 | 0.4436746 | |
| QIAamp | -0.5034133 | 2.204324 | 69.111052 | -0.2283754 | 0.8200288 | |
| log10(Final_reads) | 11.7739846 | 1.017878 | 76.540074 | 11.5671904 | 0.0000000 |
|
| Mock * lyPMA | 9.2027877 | 3.314878 | 70.208146 | 2.7762074 | 0.0070450 |
|
| Nasal * lyPMA | 2.4311825 | 3.179741 | 72.982926 | 0.7645850 | 0.4469838 | |
| Mock * Benzonase | -13.9251284 | 3.110131 | 69.065784 | -4.4773450 | 0.0000291 |
|
| Nasal * Benzonase | -1.2838446 | 3.049614 | 72.822187 | -0.4209859 | 0.6750053 | |
| Mock * Host zero | -11.2953122 | 3.117133 | 69.109488 | -3.6236226 | 0.0005505 |
|
| Nasal * Host zero | 5.4823784 | 3.112760 | 73.257010 | 1.7612597 | 0.0823672 | |
| Mock * Molysis | -10.0110464 | 3.113275 | 69.085451 | -3.2156003 | 0.0019810 |
|
| Nasal * Molysis | 5.0599707 | 3.043042 | 72.398920 | 1.6628002 | 0.1006761 | |
| Mock * QIAamp | -15.2278909 | 3.109532 | 69.062027 | -4.8971656 | 0.0000062 |
|
| Nasal * QIAamp | -4.8379307 | 3.176346 | 72.738691 | -1.5231120 | 0.1320667 |
Species richness (BAL + Pos + Neg)
No changes observed
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "BAL" | sample_data$sample_type == "Mock" | sample_data$sample_type == "Neg.")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -86.9419958 | 14.195115 | 13.415935 | -6.1247830 | 0.0000317 |
|
| Mock | -0.6787926 | 14.953730 | 4.410950 | -0.0453929 | 0.9657761 | |
| BAL | 5.4704776 | 11.638199 | 4.497464 | 0.4700450 | 0.6602220 | |
| lyPMA | -3.0591579 | 3.567716 | 67.090622 | -0.8574556 | 0.3942459 | |
| Benzonase | -2.1232410 | 3.518550 | 67.045378 | -0.6034420 | 0.5482509 | |
| Host zero | -3.4619661 | 3.541215 | 67.066496 | -0.9776209 | 0.3317752 | |
| Molysis | -2.3047390 | 3.524548 | 67.051011 | -0.6539104 | 0.5154060 | |
| QIAamp | -1.1860761 | 3.526596 | 67.052926 | -0.3363232 | 0.7376774 | |
| log10(Final_reads) | 16.0720161 | 1.657634 | 68.551578 | 9.6957548 | 0.0000000 |
|
| Mock * lyPMA | 14.0589008 | 5.314393 | 67.247488 | 2.6454390 | 0.0101480 |
|
| BAL * lyPMA | -1.3949813 | 5.407089 | 67.043923 | -0.2579912 | 0.7972040 | |
| Mock * Benzonase | -13.5692567 | 4.975335 | 67.044947 | -2.7273052 | 0.0081427 |
|
| BAL * Benzonase | -5.7266255 | 5.453212 | 67.394364 | -1.0501381 | 0.2974058 | |
| Mock * Host zero | -10.3444846 | 4.986942 | 67.052650 | -2.0743140 | 0.0418926 |
|
| BAL * Host zero | -2.2264993 | 5.435884 | 67.380448 | -0.4095929 | 0.6834038 | |
| Mock * Molysis | -9.3214938 | 4.980547 | 67.048413 | -1.8715804 | 0.0656278 | |
| BAL * Molysis | 9.0402931 | 5.520357 | 67.444404 | 1.6376285 | 0.1061570 | |
| Mock * QIAamp | -14.9825410 | 4.974342 | 67.044285 | -3.0119646 | 0.0036587 |
|
| BAL * QIAamp | -5.2759699 | 5.517870 | 67.442644 | -0.9561607 | 0.3424060 |
Species richness (sputum + Pos + Neg)
Benzonase may incrased speciess richness of sputum
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum" | sample_data$sample_type == "Mock" | sample_data$sample_type == "Neg.")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -92.3236354 | 28.562823 | 13.478941 | -3.2323008 | 0.0062829 |
|
| Mock | -0.7292052 | 30.205870 | 4.419012 | -0.0241412 | 0.9817914 | |
| Sputum | 10.5636183 | 23.206927 | 4.277837 | 0.4551925 | 0.6711239 | |
| lyPMA | -2.0980392 | 5.952543 | 67.005568 | -0.3524610 | 0.7255988 | |
| Benzonase | -0.9540210 | 5.908482 | 67.002525 | -0.1614663 | 0.8722120 | |
| Host zero | -2.4218768 | 5.917865 | 67.003178 | -0.4092484 | 0.6836627 | |
| Molysis | -1.1874354 | 5.905160 | 67.002292 | -0.2010844 | 0.8412418 | |
| QIAamp | -0.0811347 | 29.557837 | 4.053008 | -0.0027449 | 0.9979397 | |
| log10(Final_reads) | 16.7782375 | 3.230107 | 67.206668 | 5.1943292 | 0.0000021 |
|
| Mock * lyPMA | 13.4493235 | 30.311724 | 4.481516 | 0.4437004 | 0.6778513 | |
| Sputum * lyPMA | 31.4365340 | 8.409957 | 67.005171 | 3.7380137 | 0.0003861 |
|
| Mock * Benzonase | -14.9182810 | 30.142296 | 4.382641 | -0.4949285 | 0.6444466 | |
| Sputum * Benzonase | 55.1618069 | 8.851291 | 67.024952 | 6.2320634 | 0.0000000 |
|
| Mock * Host zero | -11.5957501 | 30.142990 | 4.383043 | -0.3846914 | 0.7184121 | |
| Sputum * Host zero | 85.3463367 | 9.741528 | 67.056899 | 8.7610831 | 0.0000000 |
|
| Mock * Molysis | -10.6156900 | 30.141868 | 4.382393 | -0.3521908 | 0.7409875 | |
| Sputum * Molysis | 91.1969876 | 10.516606 | 67.078308 | 8.6717129 | 0.0000000 |
|
| Mock * QIAamp | -16.3497254 | 30.142897 | 4.382989 | -0.5424072 | 0.6139508 | |
| Sputum * QIAamp | 62.3266963 | 30.472969 | 4.577081 | 2.0453109 | 0.1014241 |
Simpson
Inverse Simpson of all samples:
Inverse Simpson ~ sample_type * treatment + log10(Final_reads) + (1|original_sample)
Stratified:
Inverse Simpson ~ treatment + (1|original_sample)
Inv Simp - ANOVA
Final reads did not affect inverse Simpson
lmer_invsimpson <- lmer(data_invsimpson ~ sample_type * treatment + log10(Final_reads) + (1|subject_id), data = sample_data)
lmer_invsimpson %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 89.193944 | 22.298486 | 4 | 16.14294 | 5.8258848 | 0.0042571 |
|
| treatment | 45.186056 | 9.037211 | 5 | 112.77039 | 2.3611357 | 0.0444302 |
|
| log10(Final_reads) | 1.333353 | 1.333353 | 1 | 120.16332 | 0.3483626 | 0.5561498 | |
| sample_type * treatment | 236.399387 | 11.819969 | 20 | 110.50529 | 3.0881819 | 0.0000855 |
|
lmer_invsimpson <- lmer(data_invsimpson ~ sample_type * treatment + (1|subject_id), data = sample_data)
Simpson (all samples & interaction term)
Sputum after treatment showed differences - stratified analysis is required
#Simpson
lmer_invsimpson %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.1596359 | 2.110581 | 16.55212 | 1.4970458 | 0.1532070 | |
| Mock | 2.1313077 | 2.984812 | 16.55212 | 0.7140510 | 0.4851440 | |
| BAL | -0.4036662 | 2.491790 | 22.12652 | -0.1619985 | 0.8727773 | |
| Nasal | -1.0199272 | 2.284181 | 18.73857 | -0.4465177 | 0.6603407 | |
| Sputum | -0.2566351 | 2.445488 | 20.64163 | -0.1049423 | 0.9174347 | |
| lyPMA | -0.6047497 | 1.181903 | 109.86264 | -0.5116745 | 0.6099058 | |
| Benzonase | -0.4369252 | 1.181903 | 109.86264 | -0.3696794 | 0.7123330 | |
| Host zero | -0.4071159 | 1.181903 | 109.86264 | -0.3444579 | 0.7311604 | |
| Molysis | -0.2823875 | 1.181903 | 109.86264 | -0.2389261 | 0.8116080 | |
| QIAamp | 0.5267480 | 1.181903 | 109.86264 | 0.4456778 | 0.6567070 | |
| Mock * lyPMA | 3.0329696 | 1.671463 | 109.86264 | 1.8145593 | 0.0723204 | |
| BAL * lyPMA | 0.3912303 | 1.817072 | 109.86264 | 0.2153080 | 0.8299264 | |
| Nasal * lyPMA | 0.5572614 | 1.636030 | 114.30731 | 0.3406181 | 0.7340163 | |
| Sputum * lyPMA | 4.1940580 | 1.709029 | 109.86264 | 2.4540589 | 0.0156959 |
|
| Mock * Benzonase | 0.6013923 | 1.671463 | 109.86264 | 0.3597999 | 0.7196875 | |
| BAL * Benzonase | 0.0152940 | 1.774650 | 110.45106 | 0.0086180 | 0.9931395 | |
| Nasal * Benzonase | 0.8370388 | 1.639407 | 114.85265 | 0.5105741 | 0.6106292 | |
| Sputum * Benzonase | 8.7134085 | 1.709029 | 109.86264 | 5.0984553 | 0.0000014 |
|
| Mock * Host zero | -0.7335174 | 1.671463 | 109.86264 | -0.4388474 | 0.6616340 | |
| BAL * Host zero | -0.6540204 | 1.774650 | 110.45106 | -0.3685349 | 0.7131799 | |
| Nasal * Host zero | 0.5651256 | 1.639407 | 114.85265 | 0.3447134 | 0.7309403 | |
| Sputum * Host zero | 6.6176648 | 1.709029 | 109.86264 | 3.8721780 | 0.0001836 |
|
| Mock * Molysis | -0.7440630 | 1.671463 | 109.86264 | -0.4451566 | 0.6570825 | |
| BAL * Molysis | 2.6186628 | 1.774650 | 110.45106 | 1.4755938 | 0.1428981 | |
| Nasal * Molysis | 0.9316357 | 1.636030 | 114.30731 | 0.5694490 | 0.5701688 | |
| Sputum * Molysis | 6.9900959 | 1.709029 | 109.86264 | 4.0900976 | 0.0000824 |
|
| Mock * QIAamp | -1.4438502 | 1.671463 | 109.86264 | -0.8638240 | 0.3895666 | |
| BAL * QIAamp | -1.3151584 | 1.774650 | 110.45106 | -0.7410804 | 0.4602174 | |
| Nasal * QIAamp | -0.7602948 | 1.639407 | 114.85265 | -0.4637621 | 0.6436959 | |
| Sputum * QIAamp | 4.3874377 | 1.709029 | 109.86264 | 2.5672107 | 0.0115965 |
|
Inverse Simpson - stratified (Untreateds)
Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
Mock community treated with lyPMA showed cahnges in alpha diveresity
lmer(data_invsimpson ~ sample_type * treatment + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Mock" | sample_data$sample_type == "Neg.")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.1596359 | 1.1263953 | 50 | 2.8050862 | 0.0071468 |
|
| Mock | 2.1313077 | 1.5929635 | 50 | 1.3379514 | 0.1869658 | |
| lyPMA | -0.6047497 | 0.7881973 | 50 | -0.7672567 | 0.4465369 | |
| Benzonase | -0.4369252 | 0.7881973 | 50 | -0.5543348 | 0.5818208 | |
| Host zero | -0.4071159 | 0.7881973 | 50 | -0.5165152 | 0.6077717 | |
| Molysis | -0.2823875 | 0.7881973 | 50 | -0.3582700 | 0.7216495 | |
| QIAamp | 0.5267480 | 0.7881973 | 50 | 0.6682946 | 0.5070191 | |
| Mock * lyPMA | 3.0329696 | 1.1146793 | 50 | 2.7209346 | 0.0089329 |
|
| Mock * Benzonase | 0.6013923 | 1.1146793 | 50 | 0.5395205 | 0.5919225 | |
| Mock * Host zero | -0.7335174 | 1.1146793 | 50 | -0.6580524 | 0.5135230 | |
| Mock * Molysis | -0.7440630 | 1.1146793 | 50 | -0.6675130 | 0.5075138 | |
| Mock * QIAamp | -1.4438502 | 1.1146793 | 50 | -1.2953054 | 0.2011637 |
Inverse Simpson - stratified (NS + Pos + Neg)
Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
Mock community treated with lyPMA only showed changes in alpha diveresity.
lmer(data_invsimpson ~ sample_type * treatment + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal" | sample_data$sample_type == "Mock" | sample_data$sample_type == "Neg.")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.1596359 | 0.5855932 | 4.631504 | 5.3956162 | 0.0037204 |
|
| Mock | 2.1313077 | 0.8281538 | 4.631504 | 2.5735652 | 0.0535708 | |
| Nasal | -1.0199272 | 0.6972692 | 7.720477 | -1.4627452 | 0.1830195 | |
| lyPMA | -0.6047497 | 0.6924462 | 71.894032 | -0.8733527 | 0.3853787 | |
| Benzonase | -0.4369252 | 0.6924462 | 71.894032 | -0.6309880 | 0.5300493 | |
| Host zero | -0.4071159 | 0.6924462 | 71.894032 | -0.5879387 | 0.5584158 | |
| Molysis | -0.2823875 | 0.6924462 | 71.894032 | -0.4078114 | 0.6846240 | |
| QIAamp | 0.5267480 | 0.6924462 | 71.894032 | 0.7607061 | 0.4493204 | |
| Mock * lyPMA | 3.0329696 | 0.9792668 | 71.894032 | 3.0971842 | 0.0027859 |
|
| Nasal * lyPMA | 0.4293667 | 0.9393367 | 76.139565 | 0.4570957 | 0.6489038 | |
| Mock * Benzonase | 0.6013923 | 0.9792668 | 71.894032 | 0.6141251 | 0.5410712 | |
| Nasal * Benzonase | 0.8944160 | 0.9394703 | 76.292675 | 0.9520429 | 0.3440818 | |
| Mock * Host zero | -0.7335174 | 0.9792668 | 71.894032 | -0.7490476 | 0.4562724 | |
| Nasal * Host zero | 0.6225028 | 0.9394703 | 76.292675 | 0.6626104 | 0.5095770 | |
| Mock * Molysis | -0.7440630 | 0.9792668 | 71.894032 | -0.7598164 | 0.4498488 | |
| Nasal * Molysis | 1.0595304 | 0.9393367 | 76.139565 | 1.1279559 | 0.2628804 | |
| Mock * QIAamp | -1.4438502 | 0.9792668 | 71.894032 | -1.4744196 | 0.1447343 | |
| Nasal * QIAamp | -0.8176720 | 0.9394703 | 76.292675 | -0.8703543 | 0.3868371 |
Inverse Simpson - stratified (BAL + Pos + Neg)
Nothing changed in BAL
lmer(data_invsimpson ~ sample_type * treatment + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL" |sample_data$sample_type == "Mock" | sample_data$sample_type == "Neg.")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.5745359 | 1.301083 | 3.746913 | 2.7473533 | 0.0553671 | |
| Mock | 1.5373505 | 1.840010 | 3.746913 | 0.8355121 | 0.4533865 | |
| BAL | -0.9457293 | 1.660635 | 6.770497 | -0.5694987 | 0.5874053 | |
| lyPMA | -1.0196497 | 1.157423 | 65.468387 | -0.8809656 | 0.3815592 | |
| Benzonase | -0.8518252 | 1.157423 | 65.468387 | -0.7359672 | 0.4643795 | |
| Host zero | -0.8220158 | 1.157423 | 65.468387 | -0.7102123 | 0.4800948 | |
| Molysis | -0.6972874 | 1.157423 | 65.468387 | -0.6024484 | 0.5489568 | |
| QIAamp | 0.1118481 | 1.840010 | 3.746913 | 0.0607867 | 0.9546317 | |
| Mock * lyPMA | 3.6269268 | 2.173767 | 7.266306 | 1.6684983 | 0.1375765 | |
| BAL * lyPMA | 0.8061302 | 1.736134 | 65.468387 | 0.4643249 | 0.6439554 | |
| Mock * Benzonase | 1.1953495 | 2.173767 | 7.266306 | 0.5498977 | 0.5988779 | |
| BAL * Benzonase | 0.5573570 | 1.694028 | 66.295151 | 0.3290129 | 0.7431831 | |
| Mock * Host zero | -0.1395602 | 2.173767 | 7.266306 | -0.0642020 | 0.9505401 | |
| BAL * Host zero | -0.1119574 | 1.694028 | 66.295151 | -0.0660895 | 0.9475055 | |
| Mock * Molysis | -0.1501057 | 2.173767 | 7.266306 | -0.0690533 | 0.9468093 | |
| BAL * Molysis | 3.1607258 | 1.694028 | 66.295151 | 1.8658053 | 0.0664936 | |
| Mock * QIAamp | -0.8498929 | 2.173767 | 7.266306 | -0.3909770 | 0.7070259 | |
| BAL * QIAamp | -0.7730954 | 2.217147 | 7.770971 | -0.3486894 | 0.7365808 |
Inverse Simpson - stratified (spt + Pos + Neg)
Sputum changed after some treatment - but their changes were not treatment global.
lmer(data_invsimpson ~ sample_type * treatment + log10(Final_reads) + + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum" |sample_data$sample_type == "Mock" | sample_data$sample_type == "Neg.")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -1.8297432 | 5.6033292 | 16.558650 | -0.3265457 | 0.7481026 | |
| Mock | -0.1903964 | 5.6258760 | 4.532445 | -0.0338430 | 0.9744414 | |
| Sputum | -0.4917756 | 4.3134952 | 4.352519 | -0.1140086 | 0.9142991 | |
| lyPMA | -1.2279698 | 1.2388830 | 67.006745 | -0.9911911 | 0.3251582 | |
| Benzonase | -0.7959257 | 1.2297164 | 67.002900 | -0.6472433 | 0.5196859 | |
| Host zero | -0.9300696 | 1.2316684 | 67.003726 | -0.7551299 | 0.4528173 | |
| Molysis | -0.7073044 | 1.2290252 | 67.002606 | -0.5755003 | 0.5668808 | |
| QIAamp | 0.0861351 | 5.4748103 | 4.066784 | 0.0157330 | 0.9881891 | |
| log10(Final_reads) | 0.8966675 | 0.6721363 | 67.260263 | 1.3340561 | 0.1866850 | |
| Mock * lyPMA | 4.5167048 | 5.6505287 | 4.612797 | 0.7993420 | 0.4632439 | |
| Sputum * lyPMA | 4.3330105 | 1.7503369 | 67.006243 | 2.4755294 | 0.0158384 |
|
| Mock * Benzonase | 1.1462745 | 5.6111475 | 4.486335 | 0.2042852 | 0.8470693 | |
| Sputum * Benzonase | 8.3139445 | 1.8421544 | 67.031236 | 4.5131639 | 0.0000265 |
|
| Mock * Host zero | -0.0645138 | 5.6113089 | 4.486848 | -0.0114971 | 0.9913202 | |
| Sputum * Host zero | 5.6401972 | 2.0273686 | 67.071576 | 2.7820285 | 0.0070083 |
|
| Mock * Molysis | -0.1295673 | 5.6110480 | 4.486019 | -0.0230915 | 0.9825686 | |
| Sputum * Molysis | 5.6305507 | 2.1886282 | 67.098590 | 2.5726392 | 0.0123107 |
|
| Mock * QIAamp | -0.9220254 | 5.6112874 | 4.486780 | -0.1643162 | 0.8766238 | |
| Sputum * QIAamp | 3.5585589 | 5.6879574 | 4.735317 | 0.6256304 | 0.5604643 |
*** Results: ***
3.1. Species richness - type * method specific. Sputum showed the highest changes, in every methods
3.2. Stratified analysis showed that some methods increased some alpha diversity indices. Changes were highest at sputum. However, stratified analysis showed Benzonase was the only one showed significant changes.
A4. Taxa beta diversity
Permanova (Taxa dist ~ log10(final reads) + sample_type + treatment + sample_type * treatment + subject_id) –> both stratified and nonstratified
Beta diversity figures
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_rel, S.obs != 0 & sample_type %in% c("BAL", "Nasal", "Sputum", "Mock", "Neg."))
bray_perm_uni <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + treatment + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_uni_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + treatment,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>%
.$subject_id, permutations = 10000)
bray_perm_inter <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type * treatment + log10(Final_reads),
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>%
.$subject_id,
permutations = 10000)
bray_perm_ns <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Nasal") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Nasal") %>%
sample_data %>% data.frame(check.names = F) %>%
.$subject_id, permutations = 10000)
bray_perm_bal <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "BAL"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_spt <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Sputum")
%>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Sputum")
%>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_pos <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz,
sample_type == "Mock"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz,
sample_type == "Mock") %>% sample_data %>%
data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz,
sample_type == "Mock") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_neg <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz,
sample_type == "Neg."), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz,
sample_type == "Neg.") %>% sample_data %>%
data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz,
sample_type == "Neg.") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_ns_ctrl <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz,
sample_type == "Nasal" | sample_type == "Mock" | sample_type == "Neg."), method="bray") ~ sample_type + lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz,
sample_type == "Nasal" | sample_type == "Mock" | sample_type == "Neg.") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz,
sample_type == "Nasal" | sample_type == "Mock" | sample_type == "Neg.") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_bal_ctrl <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz,
sample_type == "BAL" | sample_type == "Mock" | sample_type == "Neg."), method="bray") ~ sample_type + lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz,
sample_type == "BAL" | sample_type == "Mock" | sample_type == "Neg.") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz,
sample_type == "BAL" | sample_type == "Mock" | sample_type == "Neg.") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_spt_ctrl <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz,
sample_type == "Sputum" | sample_type == "Mock" | sample_type == "Neg."), method="bray") ~ sample_type + lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz,
sample_type == "Sputum" | sample_type == "Mock" | sample_type == "Neg.") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz,
sample_type == "Sputum" | sample_type == "Mock" | sample_type == "Neg.") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
PCoA based on Bray-Curtis (all samples)
Based on distances, it seems like some of the negative controls were affected by some samples. Meanwhile, Positive controls (mock community) were close to BAL samples.
ordinate(phyloseq_rel_nz, method = "PCoA", distance = "bray") %>%
plot_ordination(phyloseq_rel_nz, ., col = "treatment", shape = "sample_type" ) +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_shape(name = "Sample type", labels = c("BAL", "Nasal", "Sputum", "Mock", "Neg.")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), legend.spacing = unit(0, 'cm'), legend.key.height = unit(0.4, "cm")) + #legend.position = c(0.9, 0.4)
labs(tag = "E")
PCoA based on Jaccard (all samples)
Jaccard dissimilarities (presenece and absence) showed BAL and Mock communities are distant. Some samples may have some overlaps
ordinate(phyloseq_rel_nz, method = "PCoA", distance = "jaccard") %>%
plot_ordination(phyloseq_rel_nz, ., col = "treatment", shape = "sample_type" ) +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_shape(name = "Sample type", labels = c("BAL", "Nasal", "Sputum", "Mock", "Neg.")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), legend.spacing = unit(0, 'cm'), legend.key.height = unit(0.4, "cm")) + #legend.position = c(0.9, 0.4)
labs(tag = "E")
Stratified Bray beta diversity (Mock)
Some treatment made samples distant to the theoretical composition
ordinate(subset_samples(phyloseq_control_rel, sample_type != "Neg."), method = "PCoA", distance = "bray") %>%
plot_ordination(phyloseq_control_rel, ., col = "treatment") +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Mock theoretical", "Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("black", "#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment",
breaks = c("-", "Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
labels = c("Mock theoretical", "Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_shape(name = "Sample type", labels = c("Mock theoretical", "Mock")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), legend.spacing = unit(0, 'cm'), legend.key.height = unit(0.4, "cm")) #legend.position = c(0.9, 0.4)
#labs(tag = "E")
Stratified Jaccard beta diversity (Mock)
Some treatment made samples distant to the theoretical composition
ordinate(subset_samples(phyloseq_control_rel, sample_type != "Neg."), method = "PCoA", distance = "jaccard") %>%
plot_ordination(phyloseq_control_rel, ., col = "treatment") +
scale_color_manual(values = c("black", "#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment",
breaks = c("-", "Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
labels = c("Mock theoretical", "Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_shape(name = "Sample type", labels = c("Mock theoretical", "Mock")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), legend.spacing = unit(0, 'cm'), legend.key.height = unit(0.4, "cm")) #legend.position = c(0.9, 0.4)
#labs(tag = "E")
Bar plot of mock community
Some bugs greatly decreased after some treatment
Differential abundance analysis should be conducted.
#Manipulating phyloseq - only top 10
phyloseq_control_rel%>%
subset_samples(sample_type != "Neg.") %>%
tax_table(.) %>%
cbind(species20 = "[Others]") %>%
{top20species <- head(taxa_sums(phyloseq_control_rel%>%
subset_samples(sample_type != "Neg.") ) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 20)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 8] <- .[, 8] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq_control_rel%>%
subset_samples(sample_type != "Neg.")
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Bar plot of positive controls") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 20 species")) +
facet_wrap (~ treatment, scales= "free_x", nrow=1)
#there could be opportunistic pathogens...
Stratified Bray beta diversity (negative)
However, it seems there is no differences between negative controls..
ordinate(subset_samples(phyloseq_control_rel, sample_type == "Neg."), method = "PCoA", distance = "bray") %>%
plot_ordination(phyloseq_control_rel, ., col = "treatment") +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment",
breaks = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_shape(name = "Sample type", labels = c("Mock theoretical", "Mock")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), legend.spacing = unit(0, 'cm'), legend.key.height = unit(0.4, "cm")) #legend.position = c(0.9, 0.4)
#labs(tag = "E")
Beta diversity boxplot
Distances between samples within each subject. Mean distance between control <-> treatment for each subject
#distances of betadiversity - boxplots
bray_dist_long <- distance(phyloseq_rel_nz, method="bray") %>% as.matrix() %>% melt_dist() #making long data of distance matrices
#Adding sample type and treatment name.
#this can be also done by merging metadata into the `bray_dist_long`
names <- data.frame(str_split_fixed(bray_dist_long$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(bray_dist_long$iso2, "_", 3))
bray_dist_long$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
bray_dist_long$method_1 <- ifelse(grepl("lyPMA", bray_dist_long$iso1),"lypma",
ifelse(grepl("ben", bray_dist_long$iso1),"benzonase",
ifelse(grepl("host", bray_dist_long$iso1),"host_zero",
ifelse(grepl("qia", bray_dist_long$iso1),"qiaamp",
ifelse(grepl("moly", bray_dist_long$iso1),"molysis",
"control")))))
#Adding data for iso 2 also should be done
bray_dist_long$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
bray_dist_long$method_2 <-ifelse(grepl("lyPMA", bray_dist_long$iso2),"lypma",
ifelse(grepl("ben", bray_dist_long$iso2),"benzonase",
ifelse(grepl("host", bray_dist_long$iso2),"host_zero",
ifelse(grepl("qia", bray_dist_long$iso2),"qiaamp",
ifelse(grepl("moly", bray_dist_long$iso2),"molysis",
"control")))))
#subsetting distances of my interest
bray_dist_long$sample_id_1 <- ifelse(grepl("pos", bray_dist_long$sample_id_1, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", bray_dist_long$sample_id_1, ignore.case = T),"Neg.",
bray_dist_long$sample_id_1))
bray_dist_long$sample_id_2 <- ifelse(grepl("pos", bray_dist_long$sample_id_2, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", bray_dist_long$sample_id_2, ignore.case = T),"Neg.",
bray_dist_long$sample_id_2))
path_bray_dist_long_within_sampleid_from_control <- subset(bray_dist_long, bray_dist_long$sample_id_1 == bray_dist_long$sample_id_2) # data within samples
path_bray_dist_long_within_sampleid_from_control <- subset(path_bray_dist_long_within_sampleid_from_control,
path_bray_dist_long_within_sampleid_from_control$method_1 != path_bray_dist_long_within_sampleid_from_control$method_2) # remove irrelevant association
path_bray_dist_long_within_sampleid_from_control <- subset(path_bray_dist_long_within_sampleid_from_control,
path_bray_dist_long_within_sampleid_from_control$method_1 != path_bray_dist_long_within_sampleid_from_control$method_2) # remove irrelevant association
path_bray_dist_long_within_sampleid_from_control <- subset(path_bray_dist_long_within_sampleid_from_control, (path_bray_dist_long_within_sampleid_from_control$method_1 == "control") + (path_bray_dist_long_within_sampleid_from_control$method_2 == "control") != 0)
path_bray_dist_long_within_sampleid_from_control$treatment <- path_bray_dist_long_within_sampleid_from_control$method_1
path_bray_dist_long_within_sampleid_from_control$treatment <- ifelse(path_bray_dist_long_within_sampleid_from_control$treatment == "control", path_bray_dist_long_within_sampleid_from_control$method_2, path_bray_dist_long_within_sampleid_from_control$treatment) #Setting key method
path_bray_dist_long_within_sampleid_from_control$sample_type <- ifelse(grepl("NS", path_bray_dist_long_within_sampleid_from_control$iso1), "Nasal",
ifelse(grepl("CFB", path_bray_dist_long_within_sampleid_from_control$iso1), "Sputum",
ifelse(grepl("BAL", path_bray_dist_long_within_sampleid_from_control$iso1), "BAL",
ifelse(grepl("pos|POS", path_bray_dist_long_within_sampleid_from_control$iso1), "Mock",
ifelse(grepl("neg|N_EXT", path_bray_dist_long_within_sampleid_from_control$iso1), "Neg.",NA)))))
path_bray_dist_long_within_sampleid_from_control %>%
mutate(across(sample_type, factor, levels=c( "Neg.", "Mock", "BAL", "Nasal","Sputum"))) %>%
ggplot(aes(y = dist, fill = treatment)) +
geom_boxplot() +
#scale_fill_manual(values = c(viridis(6)[2:6])) +
scale_fill_manual(values = c("#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Sample pair distances") +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type, ncol = 5)
PERMANOVA test results
Subject as fixed effect vs strata term
Subject as fixed effect
adonis (dist ~ sample_type + log10(Final_reads) + treated + subject)
With strata
adonis (dist ~ sample_type + log10(Final_reads) + treated, strata = subject)
Strata term was employed rather than fixed effect
bray_perm_uni %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html", caption = "Subject id as fixed effect") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 4 | 28.047 | 0.462 | 63.093 | 0 |
|
| log10(Final reads) | 1 | 1.268 | 0.021 | 11.410 | 0 |
|
| Treatment | 5 | 1.885 | 0.031 | 3.393 | 0 |
|
| Subject | 17 | 15.389 | 0.254 | 8.146 | 0 |
|
| Residual | 127 | 14.114 | 0.233 | NA | NA | |
| Total | 154 | 60.703 | 1.000 | NA | NA |
bray_perm_uni_strata %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html", caption = "Subject id as strata term") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 4 | 28.047 | 0.462 | 34.223 | 0 |
|
| log10(Final reads) | 1 | 1.268 | 0.021 | 6.189 | 0 |
|
| Treatment | 5 | 1.885 | 0.031 | 1.840 | 0 |
|
| Residual | 144 | 29.503 | 0.486 | NA | NA | |
| Total | 154 | 60.703 | 1.000 | NA | NA |
Treatment significantly affected the beta-diversity.
Strata term is better representing our study aim.
What type of method affected the community at the most?
PERMANOVA on each treatment
dist ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp, strata = subject
QIAamp showed highest changes. But, it could be sample type specific.
bray_perm_strata %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 4 | 28.047 | 0.462 | 34.223 | 0.000 |
|
| log10(Final reads) | 1 | 1.268 | 0.021 | 6.189 | 0.000 |
|
| lyPMA | 1 | 0.166 | 0.003 | 0.810 | 0.317 | |
| Benzonase | 1 | 0.160 | 0.003 | 0.783 | 0.263 | |
| Host zero | 1 | 0.299 | 0.005 | 1.457 | 0.026 |
|
| Molysis | 1 | 0.343 | 0.006 | 1.675 | 0.005 |
|
| QIAamp | 1 | 0.917 | 0.015 | 4.477 | 0.000 |
|
| Residual | 144 | 29.503 | 0.486 | NA | NA | |
| Total | 154 | 60.703 | 1.000 | NA | NA |
PERMANOVA with interaction term
dist ~ sample_type * treatment + log10(Final_reads), strata = subject
It was sample type specific. We need stratified analysis
bray_perm_inter %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "sample_type:treatment" ~ 'Sample type * treatment',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 4 | 28.047 | 0.462 | 38.172 | 0 |
|
| Treatment | 5 | 1.990 | 0.033 | 2.166 | 0 |
|
| log10(Final reads) | 1 | 1.164 | 0.019 | 6.334 | 0 |
|
| Sample type * treatment | 20 | 6.726 | 0.111 | 1.831 | 0 |
|
| Residual | 124 | 22.777 | 0.375 | NA | NA | |
| Total | 154 | 60.703 | 1.000 | NA | NA |
Stratified (Positive)
Untreateds chanced after treatment; by a lot. Differential abundance analysis should be conducted
bray_perm_pos %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.518 | 0.155 | 58.411 | 0.000 |
|
| Benzonase | 1 | 0.179 | 0.054 | 20.208 | 0.000 |
|
| Host zero | 1 | 0.318 | 0.095 | 35.839 | 0.000 |
|
| Molysis | 1 | 0.640 | 0.192 | 72.234 | 0.000 |
|
| QIAamp | 1 | 1.457 | 0.437 | 164.410 | 0.000 |
|
| log10(Final reads) | 1 | 0.013 | 0.004 | 1.451 | 0.234 | |
| Residual | 24 | 0.213 | 0.064 | NA | NA | |
| Total | 30 | 3.338 | 1.000 | NA | NA |
Stratified (Neg)
Untreateds chanced after treatment
bray_perm_neg %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.162 | 0.025 | 0.974 | 0.417 | |
| Benzonase | 1 | 0.256 | 0.039 | 1.540 | 0.147 | |
| Host zero | 1 | 1.089 | 0.167 | 6.556 | 0.000 |
|
| Molysis | 1 | 0.132 | 0.020 | 0.793 | 0.584 | |
| QIAamp | 1 | 0.196 | 0.030 | 1.183 | 0.306 | |
| log10(Final reads) | 1 | 0.700 | 0.107 | 4.213 | 0.000 |
|
| Residual | 24 | 3.987 | 0.611 | NA | NA | |
| Total | 30 | 6.522 | 1.000 | NA | NA |
Stratified (Nasal)
lyPMA affected Nasal samples.
bray_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.730 | 0.120 | 4.873 | 0.000 |
|
| Benzonase | 1 | 0.191 | 0.031 | 1.277 | 0.101 | |
| Host zero | 1 | 0.171 | 0.028 | 1.143 | 0.421 | |
| Molysis | 1 | 0.137 | 0.022 | 0.914 | 0.066 | |
| QIAamp | 1 | 0.254 | 0.042 | 1.694 | 0.050 | |
| log10(Final reads) | 1 | 0.428 | 0.070 | 2.861 | 0.028 |
|
| Residual | 28 | 4.192 | 0.687 | NA | NA | |
| Total | 34 | 6.103 | 1.000 | NA | NA |
Stratified (BAL)
QIAamp affected BAL samples
bray_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.100 | 0.010 | 0.272 | 0.337 | |
| Benzonase | 1 | 0.025 | 0.003 | 0.068 | 0.987 | |
| Host zero | 1 | 0.086 | 0.009 | 0.235 | 0.517 | |
| Molysis | 1 | 0.085 | 0.009 | 0.230 | 0.573 | |
| QIAamp | 1 | 0.229 | 0.024 | 0.623 | 0.005 |
|
| log10(Final reads) | 1 | 1.482 | 0.152 | 4.028 | 0.022 |
|
| Residual | 21 | 7.726 | 0.794 | NA | NA | |
| Total | 27 | 9.734 | 1.000 | NA | NA |
Stratified (spt)
Sputum was affected by Molysis and QIAamp.
bray_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.139 | 0.020 | 0.633 | 0.196 | |
| Benzonase | 1 | 0.037 | 0.005 | 0.170 | 0.768 | |
| Host zero | 1 | 0.171 | 0.025 | 0.777 | 0.130 | |
| Molysis | 1 | 0.436 | 0.063 | 1.985 | 0.010 |
|
| QIAamp | 1 | 0.953 | 0.137 | 4.339 | 0.000 |
|
| log10(Final reads) | 1 | 0.172 | 0.025 | 0.783 | 0.354 | |
| Residual | 23 | 5.052 | 0.726 | NA | NA | |
| Total | 29 | 6.960 | 1.000 | NA | NA |
Results:
4.1. Effect of each treatment on beta-diveristy was sample type specific.
4.2. NS showed no significant change by QIAamp method
4.3. Sputum showed high change after Molysis and QIAamp. However, here, high change may be meaning higher (better) detection efficiencies. Therefore further analysis is required.
Intermediate results
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal", "Sputum"),
lyPMA = c("No increase in final reads",
"No increase in final reads",
"No increase in final reads"),
Benzonase = c("No decrease in host %",
"No decrease in host %",
"No decrease in host %"),
`Host zero` = c(NA,
NA,
NA),
Molysis = c("No decrease in host %",
"High cahnge of failure in library pep",
NA),
QIAamp = c("No decrease in host %",
NA,
"No decrease in host %")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of issues of each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | No increase in final reads | No decrease in host % | NA | No decrease in host % | No decrease in host % |
| Nasal | No increase in final reads | No decrease in host % | NA | High cahnge of failure in library pep | NA |
| Sputum | No increase in final reads | No decrease in host % | NA | NA | No decrease in host % |
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal", "Sputum"),
lyPMA = c(NA,
"Beta changed",
"Shannon +"),
Benzonase = c(NA,
NA,
"Richness + InvSimp +"),
`Host zero` = c(NA,
"Richness + InvSimp + ",
NA),
Molysis = c(NA,
"Richness + InvSimp +",
"Beta changed"),
QIAamp = c("Beta changed",
NA,
"Beta changed")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of community changes induced by each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | NA | NA | NA | NA | Beta changed |
| Nasal | Beta changed | NA | Richness + InvSimp + | Richness + InvSimp + | NA |
| Sputum | Shannon + | Richness + InvSimp + | NA | Beta changed | Beta changed |
Some methods were successful in increasing final reads and lowering host DNA%.
We have no idea weather some changes in diversities are due to deeper sequencing or contaminants
Further anlyses on individual taxa are required
A5. DA analysis for taxa, by sample type and treatment
Hypothesis: if a taxon is a contaminant induced by a treatment method, its DA analysis result should be associated with treatment covariate.
Both stratified and nonstratified were conducted.
Looked at other level groups as well - family and genus
Without interaction
feature ~ log10(final reads) + sample type + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subject)
With interaction
feature ~ log10(final reads) + sample type + treatment + sample type * treatment + (1|subject)
Stratified
feature ~ log10(final reads) + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subject)
MaAsLin settings : log transform, total sum scaling normalization
Results
#DA analysis - MaAslin
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
#Running MaAslin for all sample without decontam
#for taxa differentially abundant by host depletion method, look to see which ones overlap with potential contaminant taxa
# Maaslin - # # y ~ log(final reads) + sample_type + treatment -----------
#all samples
fit_data_spt_neg <- read.csv("data/fit_data_spt_neg.csv")
fit_data_bal_neg <- read.csv("data/fit_data_bal_neg.csv")
fit_data_ns_neg <- read.csv("data/fit_data_spt_neg.csv")
MaAslin - volcano plot
Without interaction
feature ~ log10(final reads) + sample type + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subject)
Most samples are differentially abundant by sample type
#Making significance table for figure
# Define a function to make species names italicized
species_italic <- function(data) {
names <- gsub("_", " ", rownames(data))
names <- gsub("[]]|[[]", "", names)
names <- gsub(" sp", " sp.", names)
names <- gsub(" sp.", "* sp.", names)
names <- gsub(" group", "* group.", names)
names <- ifelse(grepl("[*]", names), paste("*", names, sep = ""), paste("*", names, "*", sep = ""))
rownames(data) <- names
data
}
# Make a significance table for each figure (top 20 taxa)
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$min <- apply(sig_data, 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% select("feature", "lypma", "benzonase", "host_zero", "molysis", "qiaamp") %>% .[1:20,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>%
column_to_rownames(var = "feature") %>% species_italic %>% select(-c("-")) %>%
rename(lyPMA = lypma, Benzonase = benzonase, `Host zero` = host_zero, Molysis = molysis, QIAamp = qiaamp)
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA) %>% data.frame(check.names = F)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
make_sig_table_neg <- function(data) {
sig_data <- spread(fit_data_neg[order(fit_data_neg$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$min <- apply(sig_data, 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% select("feature", "lypma", "benzonase", "host_zero", "molysis", "qiaamp") %>% .[1:16,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>%
column_to_rownames(var = "feature") %>% species_italic %>% select(-c("-")) %>%
rename(lyPMA = lypma, Benzonase = benzonase, `Host zero` = host_zero, Molysis = molysis, QIAamp = qiaamp)
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA) %>% data.frame(check.names = F)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
fit_data_neg <- make_sig_table_neg(fit_data_neg)
fit_data_pos <- make_sig_table(fit_data_pos)
fit_data_bal <- make_sig_table(fit_data_bal)
fit_data_ns <- make_sig_table(fit_data_ns)
fit_data_spt <- make_sig_table(fit_data_spt)
fit_data_bal_neg <- make_sig_table(fit_data_bal_neg)
fit_data_ns_neg <- make_sig_table(fit_data_ns_neg)
fit_data_spt_neg <- make_sig_table(fit_data_spt_neg)
neg_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Neg."),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Neg.")) %in% fit_data_neg$data$feature)
fit_data_neg$rel <- cbind(neg_sig %>% otu_table %>% t, neg_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_neg$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
pos_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Mock"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Mock")) %in% fit_data_pos$data$feature)
fit_data_pos$rel <- cbind(pos_sig %>% otu_table %>% t, pos_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_pos$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
spt_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Sputum")) %in% fit_data_spt$data$feature)
fit_data_spt$rel <- cbind(spt_sig %>% otu_table %>% t, spt_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_spt$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
spt_neg_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"),
taxa_names(subset_samples(phyloseq_rel_nz,
sample_type == "Sputum")) %in% fit_data_spt_neg$data$feature)
fit_data_spt_neg$rel <- cbind(spt_neg_sig %>% otu_table %>% t, spt_neg_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_spt_neg$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
ns_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Nasal")) %in% fit_data_ns$data$feature)
fit_data_ns$rel <- cbind(ns_sig %>% otu_table %>% t, ns_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_ns$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
ns_neg_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"),
taxa_names(subset_samples(phyloseq_rel_nz,
sample_type == "Nasal")
) %in% fit_data_spt_neg$data$feature)
fit_data_ns_neg$rel <- cbind(ns_neg_sig %>% otu_table %>% t, ns_neg_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_ns_neg$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
bal_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "BAL"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "BAL")) %in% fit_data_bal$data$feature)
fit_data_bal$rel <- cbind(bal_sig %>% otu_table %>% t, bal_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_bal$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
bal_neg_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "BAL"),
taxa_names(subset_samples(phyloseq_rel_nz,
sample_type == "BAL")
) %in% fit_data_bal_neg$data$feature)
fit_data_bal_neg$rel <- cbind(bal_neg_sig %>% otu_table %>% t, bal_neg_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(fit_data_bal_neg$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
#Volcano plot
ggplot(maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
labs(tag = "A") +
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#8c510a", "#c51b7d", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
breaks = c("log10.Final_reads", "sample_type", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("log10 (Final reads)", "Sample type", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
guides(col = guide_legend(title = "Covariates", title.position = "top", nrow = 2))
MaAslin - table
feature ~ log10(final reads) + sample type + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subject)
Some taxa were changed due to treatment
Stratified analysis is required.
Table of associations had significant (q < 0.1) result
cat("all association")
## all association
maaslin_all %>% subset(., .$qval < 0.1) %>% arrange(., .$feature) %>% .$metadata %>% table
## .
## benzonase host_zero log10.Final_reads lypma
## 27 15 100 25
## molysis qiaamp sample_type
## 31 15 122
cat("Positive association (increased taxa)")
## Positive association (increased taxa)
maaslin_all %>% subset(., .$qval < 0.1) %>% subset(., .$coef > 0) %>% arrange(., .$feature) %>% .$metadata %>% table
## .
## benzonase host_zero log10.Final_reads lypma
## 19 7 99 24
## molysis qiaamp sample_type
## 23 7 111
MaAslin - can’t test global significance of a covariate with multi-level.
(https://forum.biobakery.org/t/global-significance-test-for-multilevel-factor/3061)
Baloon plot - Neg.
No taxa changed after treatment
#Mergeing data into one dataframe
merge(fit_data_neg$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
fit_data_neg$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(fit_data_neg$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
Baloon plot - Pos.
Taxa decreased after treatment
#Mergeing data into one dataframe
merge(fit_data_pos$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
fit_data_pos$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(fit_data_pos$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
Baloon plot - BAL
Mean relative abundances of top 20 taxa had low q-values.
No taxa changed after treatment
#Mergeing data into one dataframe
merge(fit_data_bal$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
fit_data_bal$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(fit_data_bal$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
#Mergeing data into one dataframe
merge(fit_data_bal_neg$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
fit_data_bal_neg$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(fit_data_bal_neg$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
Baloon plot - Nasals
Mean relative abundances of top 20 taxa had low q-values.
Some taxa changed after treatment, but nothing was unique
#Mergeing data into one dataframe
merge(fit_data_ns$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
fit_data_ns$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(fit_data_ns$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
#Data with negative
merge(fit_data_ns_neg$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
fit_data_ns_neg$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(fit_data_ns_neg$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
Baloon plot - Sputum
Mean relative abundances of top 20 taxa had low q-values.
Some taxa changed after treatment, but nothing was unique
#Mergeing data into one dataframe
merge(fit_data_spt$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
fit_data_spt$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(fit_data_spt$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
#Mergeing data into one dataframe
merge(fit_data_spt_neg$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
fit_data_spt_neg$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(fit_data_spt_neg$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
Results
Some taxa were significantly changed after treatment. Among top 20, no taxa observed in only one treatment group. As their emergence were consistent across all treatment groups, they were considered as endogenus.
MaAslin with interaction
feature ~ log10(Final reads) + treatment + sample type + treatment * sample type (1|subject)
Some taxa were treaetment specific, after adjusting interaction of sample type * treatment
#Generating interaction term
#sample_data(phyloseq_rel_nz)$sampletype_treatment <- paste(sample_data(phyloseq_rel_nz)$sample_type, #sample_data(phyloseq_rel_nz)$treatment, sep = "*")
#capture.output(maaslin_interaction = Maaslin2(input_data = otu_table(phyloseq_rel_nz) %>% t %>% data.frame(),
# input_metadata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
# output = "data",
# fixed_effects = c("sample_type", "log10.Final_reads", "treatment", "sampletype_treatment"),
# transform = "LOG", #default
# normalization = "TSS",
# random_effects = c("subject_id"),
# reference = c("sample_type,BAL", "treatment,Untreated", "sampletype_treatment,BAL*Untreated"),
# plot_heatmap = F,
# plot_scatter = F))
maaslin_interaction <- read.csv("data/maaslin_interaction.csv")
#interaction term - ggplot
ggplot(maaslin_interaction, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
#labs(tag = "A") +
ggtitle("MaAslin with interaction term")+
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3")) +
guides(col = guide_legend(title = "Fixed effects", title.position = "top", nrow = 1))
#Checking number of bugs differentially abundance with interaction term
cat("Number of differentially abundant bugs by each metadata")
## Number of differentially abundant bugs by each metadata
maaslin_interaction %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## log10.Final_reads sample_type sampletype_treatment
## 38 146 560
## treatment
## 135
MaAsLin interaction analysis
Hypothesis: if a sample is contaminated by some treatment, a change of taxon is likely to be associated with one treatment method
No taxa increased only because of one treatment method
cat("Some taxa were increased by each treatmment.\n But they are not contaminants, \nif they are present in most of the treatments")
## Some taxa were increased by each treatmment.
## But they are not contaminants,
## if they are present in most of the treatments
maaslin_interaction %>% subset(., .$qval < 0.1 & .$metadata == "treatment") %>% .$feature %>% table %>% data.frame %>% arrange(-Freq) %>% rename(Feature = ".") %>% kbl(format = "html", caption = "Table of taxa differentially abundant by treatment") %>%
kable_styling(full_width = 0, html_font = "serif")
| Feature | Freq |
|---|---|
| Cryptococcus_gattii_VGI | 5 |
| Cryptococcus_gattii_VGII | 5 |
| Cryptococcus_neoformans | 5 |
| Hydrogenibacillus_schlegelii | 5 |
| Kouleothrix_aurantiaca | 5 |
| Limnochorda_pilosa | 5 |
| Listeria_floridensis | 5 |
| Saccharomyces_cerevisiae | 5 |
| Saccharomyces_cerevisiae_x_Saccharomyces_kudriavzevii | 5 |
| Thermoleophilum_album | 5 |
| Acholeplasma_oculi | 4 |
| Alkalilimnicola_ehrlichii | 4 |
| Bacillus_ginsengihumi | 4 |
| Bacillus_intestinalis | 4 |
| Brochothrix_campestris | 4 |
| Cryptococcus_gattii_VGIII | 4 |
| Cupriavidus_sp | 4 |
| Cutibacterium_acnes | 4 |
| Escherichia_coli | 4 |
| Listeria_innocua | 4 |
| Listeria_monocytogenes | 4 |
| Paludisphaera_borealis | 4 |
| Pseudomonas_aeruginosa_group | 4 |
| Pseudomonas_formosensis | 4 |
| Saccharomyces_kudriavzevii | 4 |
| Salmonella_enterica | 4 |
| Staphylococcus_schweitzeri | 4 |
| Sutterella_parvirubra | 4 |
| Thiohalorhabdus_denitrificans | 4 |
| Staphylococcus_argenteus | 3 |
| Enterococcus_faecalis | 2 |
| Brevundimonas_diminuta | 1 |
| Dolosigranulum_pigrum | 1 |
| Granulicatella_elegans | 1 |
| Microbacterium_laevaniformans | 1 |
cat("Most of taxa were found on most of treatments.")
## Most of taxa were found on most of treatments.
cat("Some taxa were treatment specific, only to one group")
## Some taxa were treatment specific, only to one group
subset(maaslin_interaction, maaslin_interaction$feature %in% (maaslin_interaction %>% subset(., .$qval < 0.1 & .$metadata == "treatment") %>%
.$feature %>% table %>% data.frame %>% subset(., Freq == 1) %>% .$. %>% as.character())) %>% subset(., .$qval < 0.1) %>% select(c("feature", "metadata", "value", "coef", "qval")) %>% remove_rownames() %>% kbl(format = "html", caption = "Table of taxa specific to one treatment group") %>%
kable_styling(full_width = 0, html_font = "serif")
| feature | metadata | value | coef | qval |
|---|---|---|---|---|
| Granulicatella_elegans | sampletype_treatment | Sputum*Control | -8.4411029 | 0.0000000 |
| Microbacterium_laevaniformans | sample_type | Neg. | 9.1206764 | 0.0000000 |
| Granulicatella_elegans | sample_type | Sputum | 8.2066053 | 0.0000000 |
| Brevundimonas_diminuta | treatment | lyPMA | 7.3874257 | 0.0000053 |
| Granulicatella_elegans | sampletype_treatment | Sputum*QIAamp | 4.4759863 | 0.0000075 |
| Granulicatella_elegans | sampletype_treatment | Sputum*Host zero | 3.8472447 | 0.0001604 |
| Granulicatella_elegans | sampletype_treatment | Nasal*QIAamp | 4.1672768 | 0.0001821 |
| Granulicatella_elegans | sampletype_treatment | Nasal*Host zero | 3.3489985 | 0.0011514 |
| Microbacterium_laevaniformans | sampletype_treatment | Neg.*Molysis | 6.8528828 | 0.0014263 |
| Brevundimonas_diminuta | sampletype_treatment | Neg.*Molysis | 7.9717485 | 0.0014673 |
| Granulicatella_elegans | sampletype_treatment | Nasal*lyPMA | 3.4918665 | 0.0015153 |
| Microbacterium_laevaniformans | sampletype_treatment | Neg.*Host zero | 6.6495802 | 0.0019716 |
| Granulicatella_elegans | treatment | Benzonase | 3.1206692 | 0.0028540 |
| Granulicatella_elegans | sampletype_treatment | Neg.*Benzonase | -3.1610441 | 0.0029075 |
| Granulicatella_elegans | sampletype_treatment | BAL*Benzonase | -3.6395276 | 0.0044223 |
| Brevundimonas_diminuta | sampletype_treatment | Neg.*Host zero | 7.1426956 | 0.0049037 |
| Microbacterium_laevaniformans | sampletype_treatment | Neg.*QIAamp | 6.0831853 | 0.0057000 |
| Brevundimonas_diminuta | sampletype_treatment | BAL*lyPMA | -7.1358804 | 0.0059883 |
| Brevundimonas_diminuta | sample_type | Neg. | 4.8402240 | 0.0079955 |
| Brevundimonas_diminuta | sampletype_treatment | Nasal*lyPMA | -6.4798307 | 0.0101716 |
| Granulicatella_elegans | sample_type | Nasal | -3.8626177 | 0.0105339 |
| Granulicatella_elegans | sampletype_treatment | Nasal*Control | 3.2170548 | 0.0106974 |
| Microbacterium_laevaniformans | sampletype_treatment | Neg.*Benzonase | 5.6035401 | 0.0120596 |
| Dolosigranulum_pigrum | sample_type | Nasal | 11.1588968 | 0.0129222 |
| Brevundimonas_diminuta | sampletype_treatment | Sputum*lyPMA | -6.2658174 | 0.0142266 |
| Granulicatella_elegans | log10.Final_reads | log10.Final_reads | 0.5136443 | 0.0373443 |
| Microbacterium_laevaniformans | sampletype_treatment | Nasal*lyPMA | -4.6907843 | 0.0378643 |
| Microbacterium_laevaniformans | sampletype_treatment | Sputum*lyPMA | -4.3907626 | 0.0597485 |
| Dolosigranulum_pigrum | treatment | lyPMA | -2.1094994 | 0.0645820 |
| Granulicatella_elegans | sampletype_treatment | BAL*Molysis | -2.4603992 | 0.0830127 |
| Microbacterium_laevaniformans | treatment | lyPMA | 2.7417300 | 0.0926297 |
No taxa was increased due to one treatmemnt.
A5 Results:
5.1. Both non-stratified and stratified analysis showed that there were no potential contaminants at species level.
5.2. Molysis may inducted 1 potential contaminants (Streptococcaceae), at family level
5.3. After adding control data, MaAslin needs to be reanalyzed. Adding controls (mock communities) for each treatment group will show more statistically valid results in y ~ log(final reads) + sample_type + treatment, (re = subject_id))
A6. Decontam - stratified by treatment
input of DNA concentration: 16S qPCR data
https://github.com/benjjneb/decontam/issues/33
Ben Callahan: But in the more limited testing on qPCR data the method still seems to work, and other publications report strong patterns of inverse frequency of contaminants using qPCR data - which is the pattern the frequency method relies on.
Both stratified and nonstratified
Strategy:
2.4.1. run decontam for all samples (common contaminants, by extraction)
2.4.2. stratify decontam analysis per each treatment method (contaminants by depletion methods)
Results
decontam - all sample
Listeria floridensis could be a potential contaminant
# Decontam package --------------------------------------------------------
# common contaminants across all the treatment methods
#Decontam - were there any contaminants?#
sample_data(phyloseq$phyloseq_rel)$is.neg <- grepl("Neg", sample_data(phyloseq$phyloseq_rel)$sample_type)
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_rel, S.obs != 0)
#With all sampels
dec_f_all <- isContaminant(phyloseq_rel_nz, method="frequency", conc="DNA_bac_well")
dec_p_all <- isContaminant(phyloseq_rel_nz, method="prevalence", neg="is.neg", threshold=0.5)
dec_c_all <- isContaminant(phyloseq_rel_nz, method="combined", neg="is.neg", conc = "DNA_bac_well")
cat("decontam frequency - all sample")
## decontam frequency - all sample
dec_f_all %>% subset(.,.$contaminant)
cat("decontam prevalence - all sample")
## decontam prevalence - all sample
dec_p_all %>% subset(.,.$contaminant)
cat("decontam combined - all sample")
## decontam combined - all sample
dec_c_all %>% subset(.,.$contaminant)
decontam - stratified by sample_type
Stratified analysis showed no contaminants in NS and BAL
Sputum may have Corynebacterium pseudodiphtheriticum and Candida albicans as contaminants.
#Stratified by sample type
cat("decontam prevalence - BAL")
## decontam prevalence - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam prevalence - Nasal")
## decontam prevalence - Nasal
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal", "Neg")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam prevalence - Sputum")
## decontam prevalence - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam frequency - BAL")
## decontam frequency - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam frequency - Nasal")
## decontam frequency - Nasal
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal", "Neg")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam frequency - Sputum")
## decontam frequency - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - BAL")
## decontam combined - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Nasal")
## decontam combined - Nasal
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal", "Neg")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Sputum")
## decontam combined - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
Stratified analysis showed no contaminants in NS and BAL
Sputum may have Corynebacterium pseudodiphtheriticum and Candida albicans as contaminants.
#Stratified by treatment
cat("decontam prevalence - lyPMA")
## decontam prevalence - lyPMA
subset_samples(phyloseq_rel_nz, treatment = "lypma") %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam prevalence - lyPMA")
## decontam prevalence - lyPMA
subset_samples(phyloseq_rel_nz, treatment = "lypma") %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam prevalence - lyPMA")
## decontam prevalence - lyPMA
subset_samples(phyloseq_rel_nz, treatment = "lypma") %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam frequency - Benzonase")
## decontam frequency - Benzonase
subset_samples(phyloseq_rel_nz, treatment = "benzonase") %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam frequency - Benzonase")
## decontam frequency - Benzonase
subset_samples(phyloseq_rel_nz, treatment = "benzonase") %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam frequency - Benzonase")
## decontam frequency - Benzonase
subset_samples(phyloseq_rel_nz, treatment = "benzonase") %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Molysis")
## decontam combined - Molysis
subset_samples(phyloseq_rel_nz, treatment = "molysis") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Molysis")
## decontam combined - Molysis
subset_samples(phyloseq_rel_nz, treatment = "molysis") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Molysis")
## decontam combined - Molysis
subset_samples(phyloseq_rel_nz, treatment = "molysis") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Host zero")
## decontam combined - Host zero
subset_samples(phyloseq_rel_nz, treatment = "host_zero") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Host zero")
## decontam combined - Host zero
subset_samples(phyloseq_rel_nz, treatment = "host_zero") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Host zero")
## decontam combined - Host zero
subset_samples(phyloseq_rel_nz, treatment = "host_zero") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - QIAamp")
## decontam combined - QIAamp
subset_samples(phyloseq_rel_nz, treatment = "qiaamp") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - QIAamp")
## decontam combined - QIAamp
subset_samples(phyloseq_rel_nz, treatment = "qiaamp") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - QIAamp")
## decontam combined - QIAamp
subset_samples(phyloseq_rel_nz, treatment = "qiaamp") %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
A6 Results:
6.1. Listeria floridensis could be a potential contaminant
6.2. Else, BAL and NS are free from contaminants, and sputum may have Corynebacterium pseudodiphtheriticum and Candida albicans as contaminants.
Further analysis is required after adding data of controls.
A7. LM of function alpha diversity
sample_data <- sample_data(phyloseq$phyloseq_path_rpkm) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_path_rpkm, S.obs != 0 & sample_type %in% c("BAL", "Nasal", "Sputum", "Neg.", "Mock"))
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
sample_data(phyloseq_rel_nz)$sampletype_treatment <- paste(sample_data(phyloseq_rel_nz)$sample_type, sample_data(phyloseq_rel_nz)$treatment, sep = ":")
Figure - Alpha diversity
Alpha diversity of functional analysis reult showed similar pattern with taxa result.
Similar approach was employed.
f4a <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpkm), sample_data$sample_type %in% c("Sputum", "Nasal", "BAL")), aes(y = S.obs)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Species richness") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "A") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4b <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpkm), sample_data$sample_type %in% c("Sputum", "Nasal", "BAL")), aes(y = data_shannon)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Shannon") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "B") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4c <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpkm), sample_data$sample_type %in% c("Sputum", "Nasal", "BAL")), aes(y = data_invsimpson)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Inverse simpson") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "C") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
f4d <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpkm), sample_data$sample_type %in% c("Sputum", "Nasal", "BAL")), aes(y = dbp)) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
#scale_fill_viridis(discrete = 6, name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + # color using viridis
scale_fill_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Berger-Parker index") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "D") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type) +
guides(fill = guide_legend(nrow = 1))
ggarrange(f4a, f4b, f4c, f4d, common.legend = T, align = "hv") # alpha diversity plots
Function richness
Alpha diversity chould be having changes due to treatment.
Both stratified and nonstratified analyses were conducted.
All samples:
S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
Stratified:
S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
Function richness (all samples & interaction term) - ANOVA
Interaction term showed high p values. However, it could be due to even effect sample type * treatment. Interaction term will be tested.
sample_data <- sample_data(phyloseq$phyloseq_path_rpkm) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
lmer_sob <- lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_sob %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 37549.41 | 9387.353 | 4 | 14.04851 | 8.565282 | 0.0010209 |
|
| treatment | 32851.08 | 6570.217 | 5 | 103.61403 | 5.994849 | 0.0000653 |
|
| log10(Final_reads) | 90968.23 | 90968.229 | 1 | 113.90994 | 83.001948 | 0.0000000 |
|
| sample_type * treatment | 91552.36 | 4577.618 | 20 | 100.80643 | 4.176746 | 0.0000008 |
|
Function richness (all samples & interaction term)
Effect of some treatment was neutralized by interactin term. Therefore, the association was sample_type specific.
Stratified analysis will be conducted.
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -383.158282 | 59.984929 | 74.83486 | -6.3875758 | 0.0000000 |
|
| Mock | 203.918522 | 52.531891 | 20.31266 | 3.8818044 | 0.0009058 |
|
| BAL | -1.095471 | 43.958000 | 27.47243 | -0.0249209 | 0.9802982 | |
| Nasal | 43.349247 | 39.476273 | 21.79227 | 1.0981089 | 0.2841421 | |
| Sputum | 124.694518 | 41.649510 | 22.75598 | 2.9939012 | 0.0065308 |
|
| lyPMA | 51.382920 | 31.479656 | 100.44998 | 1.6322580 | 0.1057562 | |
| Benzonase | -35.158089 | 24.193720 | 98.95731 | -1.4531907 | 0.1493347 | |
| Host zero | -43.729450 | 24.294952 | 99.12001 | -1.7999398 | 0.0749123 | |
| Molysis | -46.214379 | 24.223462 | 99.00536 | -1.9078355 | 0.0593101 | |
| QIAamp | -18.489418 | 25.344982 | 99.02304 | -0.7295100 | 0.4674118 | |
| log10(Final_reads) | 74.222366 | 8.146868 | 113.90994 | 9.1105405 | 0.0000000 |
|
| Mock * lyPMA | -34.894063 | 39.269456 | 101.75242 | -0.8885802 | 0.3763245 | |
| BAL * lyPMA | -8.566753 | 39.776838 | 99.25496 | -0.2153704 | 0.8299205 | |
| Nasal * lyPMA | 6.268734 | 38.148085 | 103.97327 | 0.1643263 | 0.8697933 | |
| Sputum * lyPMA | -10.468558 | 37.029354 | 99.20803 | -0.2827097 | 0.7779884 | |
| Mock * Benzonase | -45.396371 | 31.423393 | 98.95087 | -1.4446680 | 0.1517104 | |
| BAL * Benzonase | 105.298371 | 35.446466 | 102.00842 | 2.9706310 | 0.0037060 |
|
| Nasal * Benzonase | 38.051696 | 30.881512 | 103.41089 | 1.2321837 | 0.2206748 | |
| Sputum * Benzonase | 57.175589 | 32.538793 | 99.59139 | 1.7571515 | 0.0819651 | |
| Mock * Host zero | -53.522125 | 31.480358 | 99.02168 | -1.7001752 | 0.0922369 | |
| BAL * Host zero | 143.331632 | 35.369394 | 101.89139 | 4.0524198 | 0.0000992 |
|
| Nasal * Host zero | 36.754407 | 31.232445 | 103.97139 | 1.1768021 | 0.2419616 | |
| Sputum * Host zero | 57.130886 | 33.900582 | 101.05164 | 1.6852480 | 0.0950266 | |
| Mock * Molysis | 33.565937 | 31.450375 | 98.98447 | 1.0672667 | 0.2884475 | |
| BAL * Molysis | 163.462129 | 35.740427 | 102.42825 | 4.5735920 | 0.0000135 |
|
| Nasal * Molysis | 75.501680 | 30.833226 | 102.90432 | 2.4487117 | 0.0160273 |
|
| Sputum * Molysis | 41.704107 | 35.204240 | 102.26355 | 1.1846331 | 0.2389084 | |
| Mock * QIAamp | -27.329172 | 32.280320 | 98.94583 | -0.8466202 | 0.3992498 | |
| BAL * QIAamp | 73.424629 | 36.463347 | 102.24051 | 2.0136558 | 0.0466731 |
|
| Nasal * QIAamp | 15.581713 | 32.454958 | 103.38154 | 0.4801027 | 0.6321686 | |
| Sputum * QIAamp | 29.006234 | 34.256713 | 100.51284 | 0.8467314 | 0.3991564 |
All terms were significant.
Function richness - stratified (NS)
Some treatment enabled discovering more functions in Nasals
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -37.765323 | 54.755769 | 23.34813 | -0.6897049 | 0.4971783 | |
| lyPMA | 35.228578 | 11.218262 | 20.15999 | 3.1402885 | 0.0051161 |
|
| Benzonase | 7.066046 | 10.406810 | 19.90394 | 0.6789829 | 0.5049681 | |
| Host zero | 30.210307 | 12.622416 | 21.05053 | 2.3933854 | 0.0260893 |
|
| Molysis | 36.387452 | 10.412660 | 19.69726 | 3.4945397 | 0.0023246 |
|
| QIAamp | 48.253391 | 13.356859 | 20.43457 | 3.6126300 | 0.0016903 |
|
| log10(Final_reads) | 27.875498 | 8.269368 | 22.47665 | 3.3709343 | 0.0026979 |
|
Function richness (BAL)
Higher Final reads enables more discovery of functions.
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -619.07837 | 97.40440 | 18.90640 | -6.3557535 | 0.0000044 |
|
| lyPMA | 12.81140 | 33.30997 | 18.47738 | 0.3846115 | 0.7049162 | |
| Benzonase | 16.06655 | 35.09574 | 19.99559 | 0.4577921 | 0.6520375 | |
| Host zero | 39.14552 | 36.34582 | 19.96658 | 1.0770296 | 0.2943086 | |
| Molysis | 52.86775 | 37.17369 | 19.89387 | 1.4221819 | 0.1704638 | |
| QIAamp | -10.00821 | 37.29607 | 19.88056 | -0.2683449 | 0.7911986 | |
| log10(Final_reads) | 120.04861 | 17.60959 | 17.16210 | 6.8172310 | 0.0000028 |
|
Function richness (sputum)
Sputum showed no changes. This may due to an enrichment of richness in control groups.
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -138.66543 | 192.34371 | 21.48406 | -0.7209252 | 0.4787277 | |
| lyPMA | 52.01863 | 27.85307 | 19.93178 | 1.8676082 | 0.0765926 | |
| Benzonase | 39.40910 | 35.11082 | 20.48877 | 1.1224207 | 0.2746653 | |
| Host zero | 47.80613 | 58.99496 | 21.05693 | 0.8103425 | 0.4268072 | |
| Molysis | 36.40748 | 68.78723 | 21.13504 | 0.5292767 | 0.6021291 | |
| QIAamp | 39.62629 | 51.21113 | 20.95872 | 0.7737826 | 0.4477001 | |
| log10(Final_reads) | 53.66176 | 32.83658 | 21.34597 | 1.6342066 | 0.1168788 |
Simpson function
Inverse Simpson of all samples:
Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
Stratified:
Inverse Simpson ~ treatment + log10 (Final_reads) + (1|original_sample)
Inv Simp - ANOVA
p - value = 0.096 for the interaction term. Interaction term will be tested.
lmer_invsimpson <- lmer(data_invsimpson ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_invsimpson %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.4555813 | 0.1138953 | 4 | 7.808402 | 3.1934129 | 0.0778088 | |
| treatment | 0.0924146 | 0.0184829 | 5 | 106.883870 | 0.5182264 | 0.7619900 | |
| log10(Final_reads) | 0.4769971 | 0.4769971 | 1 | 107.468738 | 13.3741106 | 0.0003968 |
|
| sample_type * treatment | 1.0819608 | 0.0540980 | 20 | 102.083083 | 1.5168082 | 0.0915111 |
Inv. Simpson (all samples & interaction term)
Sample type specific effect was observed. Stratified anlysis required.
#Simpson
lmer_invsimpson %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.4129598 | 0.2818527 | 83.63165 | 8.5610674 | 0.0000000 |
|
| Mock | 0.0275772 | 0.1903493 | 25.42229 | 0.1448768 | 0.8859506 | |
| BAL | 0.0714714 | 0.1756825 | 47.45648 | 0.4068212 | 0.6859684 | |
| Nasal | 0.3474791 | 0.1480822 | 33.68398 | 2.3465278 | 0.0249738 |
|
| Sputum | 0.4476724 | 0.1587808 | 37.79368 | 2.8194366 | 0.0076172 |
|
| lyPMA | -0.0167207 | 0.1784706 | 102.51973 | -0.0936890 | 0.9255390 | |
| Benzonase | -0.0476879 | 0.1380003 | 99.44655 | -0.3455637 | 0.7304006 | |
| Host zero | -0.0057907 | 0.1384873 | 99.78258 | -0.0418138 | 0.9667307 | |
| Molysis | -0.1336443 | 0.1381433 | 99.54580 | -0.9674322 | 0.3356726 | |
| QIAamp | 0.1066600 | 0.1445290 | 99.58232 | 0.7379837 | 0.4622599 | |
| log10(Final_reads) | -0.1560495 | 0.0426707 | 107.46874 | -3.6570631 | 0.0003968 |
|
| Mock * lyPMA | 0.1819456 | 0.2214136 | 105.15685 | 0.8217457 | 0.4130809 | |
| BAL * lyPMA | 0.3534296 | 0.2265502 | 100.38312 | 1.5600496 | 0.1218954 | |
| Nasal * lyPMA | -0.0800303 | 0.2135225 | 107.59502 | -0.3748095 | 0.7085399 | |
| Sputum * lyPMA | -0.0256880 | 0.2110019 | 99.96434 | -0.1217432 | 0.9033468 | |
| Mock * Benzonase | 0.1552707 | 0.1792427 | 99.43323 | 0.8662595 | 0.3884333 | |
| BAL * Benzonase | 0.0900448 | 0.1994768 | 106.43493 | 0.4514049 | 0.6526162 | |
| Nasal * Benzonase | 0.0536948 | 0.1736092 | 104.99243 | 0.3092855 | 0.7577173 | |
| Sputum * Benzonase | -0.1076592 | 0.1851264 | 100.75533 | -0.5815444 | 0.5621724 | |
| Mock * Host zero | 0.1347771 | 0.1795167 | 99.57950 | 0.7507775 | 0.4545570 | |
| BAL * Host zero | 0.2314543 | 0.1991447 | 106.20401 | 1.1622418 | 0.2477424 | |
| Nasal * Host zero | -0.0964932 | 0.1751823 | 105.94336 | -0.5508161 | 0.5829194 | |
| Sputum * Host zero | -0.2287391 | 0.1917129 | 103.74581 | -1.1931334 | 0.2355386 | |
| Mock * Molysis | 0.2483938 | 0.1793725 | 99.50263 | 1.3847930 | 0.1692130 | |
| BAL * Molysis | 0.4819250 | 0.2007603 | 107.25309 | 2.4004988 | 0.0180949 |
|
| Nasal * Molysis | -0.0779791 | 0.1735718 | 104.68812 | -0.4492611 | 0.6541721 | |
| Sputum * Molysis | -0.0596846 | 0.1980538 | 106.17034 | -0.3013557 | 0.7637327 | |
| Mock * QIAamp | 0.0428010 | 0.1841344 | 99.42283 | 0.2324443 | 0.8166708 | |
| BAL * QIAamp | -0.0561860 | 0.2049968 | 106.86142 | -0.2740823 | 0.7845505 | |
| Nasal * QIAamp | -0.2362964 | 0.1823044 | 105.71980 | -1.2961638 | 0.1977415 | |
| Sputum * QIAamp | -0.3755703 | 0.1941641 | 102.64831 | -1.9342930 | 0.0558304 |
Inverse Simpson - stratified (NS)
Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
Nasal showed changes after Molysis treatment
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.4811470 | 0.3874040 | 27.07580 | 6.4045475 | 0.0000007 |
|
| lyPMA | -0.0693508 | 0.0880237 | 23.97217 | -0.7878651 | 0.4384971 | |
| Benzonase | -0.0047597 | 0.0825888 | 22.71718 | -0.0576314 | 0.9545462 | |
| Host zero | -0.1435767 | 0.0962040 | 26.11397 | -1.4924199 | 0.1475728 | |
| Molysis | -0.2248548 | 0.0830164 | 22.52427 | -2.7085608 | 0.0126705 |
|
| QIAamp | -0.1700352 | 0.1031617 | 25.68792 | -1.6482392 | 0.1114825 | |
| log10(Final_reads) | -0.1131938 | 0.0589236 | 27.09916 | -1.9210251 | 0.0653065 |
Inverse Simpson - stratified (BAL)
No changes found at BAL
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.7219275 | 0.7353785 | 18.44625 | 3.7013966 | 0.0015784 |
|
| lyPMA | 0.3561431 | 0.2540316 | 18.47266 | 1.4019640 | 0.1775062 | |
| Benzonase | 0.0775164 | 0.2662674 | 19.99882 | 0.2911223 | 0.7739557 | |
| Host zero | 0.2668472 | 0.2754566 | 19.90468 | 0.9687450 | 0.3442973 | |
| Molysis | 0.3931670 | 0.2815568 | 19.78278 | 1.3964040 | 0.1780653 | |
| QIAamp | 0.0958921 | 0.2824594 | 19.76198 | 0.3394897 | 0.7378222 | |
| log10(Final_reads) | -0.1992993 | 0.1325153 | 16.23589 | -1.5039723 | 0.1517915 |
Inverse Simpson - stratified (spt)
Changes associated with deeper sequencing with sputum
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 4.2492828 | 0.4271773 | 20.76851 | 9.9473516 | 0.0000000 |
|
| lyPMA | 0.0863071 | 0.0612866 | 19.53742 | 1.4082531 | 0.1747707 | |
| Benzonase | 0.0462489 | 0.0774828 | 19.90864 | 0.5968921 | 0.5573071 | |
| Host zero | 0.1642745 | 0.1306066 | 20.29757 | 1.2577811 | 0.2227494 | |
| Molysis | 0.2809720 | 0.1523555 | 20.35230 | 1.8441861 | 0.0797637 | |
| QIAamp | 0.0685141 | 0.1133096 | 20.22927 | 0.6046627 | 0.5521257 | |
| log10(Final_reads) | -0.3943791 | 0.0728223 | 20.50219 | -5.4156359 | 0.0000245 |
|
A8. Function beta diversity
Permanova (Taxa dist ~ log10(final reads) + sample_type + treatment + sample_type * treatment + subject_id) –> both stratified and nonstratified
Beta diversity figure
PCoA based on Bray-Curtis dissimilarities
bray_perm_uni_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + treatment,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_inter <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type * treatment + log10(Final_reads),
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_ns <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Nasal") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Nasal") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_bal <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "BAL"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_spt <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Sputum") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Sputum")
%>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
ordinate(phyloseq_rel_nz, method = "PCoA", distance = "bray") %>%
plot_ordination(phyloseq_rel_nz, ., col = "treatment", shape = "sample_type" ) +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_shape(name = "Sample type", labels = c("BAL", "Nasal", "Sputum")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "serif") +
theme(plot.tag = element_text(size = 15), legend.spacing = unit(0, 'cm'), legend.key.height = unit(0.4, "cm")) + #legend.position = c(0.9, 0.4)
labs(tag = "E")
Beta diversity boxplot (Function)
Distances between samples within each subject. Mean distance between control <-> treatment for each subject
#distances of betadiversity - boxplots
bray_dist_long <- distance(phyloseq_rel_nz, method="bray") %>% as.matrix() %>% melt_dist() #making long data of distance matrices
#Adding sample type and treatment name.
#this can be also done by merging metadata into the `bray_dist_long`
names <- data.frame(str_split_fixed(bray_dist_long$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(bray_dist_long$iso2, "_", 3))
bray_dist_long$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
bray_dist_long$method_1 <- ifelse(grepl("control", bray_dist_long$iso1),"control",
ifelse(grepl("lyPMA", bray_dist_long$iso1),"lypma",
ifelse(grepl("benzonase", bray_dist_long$iso1),"benzonase",
ifelse(grepl("host", bray_dist_long$iso1),"host_zero",
ifelse(grepl("qia", bray_dist_long$iso1),"qiaamp",
ifelse(grepl("moly", bray_dist_long$iso1),"molysis",
NA))))))
#Adding data for iso 2 also should be done
bray_dist_long$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
bray_dist_long$method_2 <-ifelse(grepl("control", bray_dist_long$iso2),"control",
ifelse(grepl("lyPMA", bray_dist_long$iso2),"lypma",
ifelse(grepl("benzonase", bray_dist_long$iso2),"benzonase",
ifelse(grepl("host", bray_dist_long$iso2),"host_zero",
ifelse(grepl("qia", bray_dist_long$iso2),"qiaamp",
ifelse(grepl("moly", bray_dist_long$iso2),"molysis",
NA))))))
#subsetting distances of my interest
path_bray_dist_long_within_sampleid <- subset(bray_dist_long, bray_dist_long$sample_id_1 == bray_dist_long$sample_id_2)
path_bray_dist_long_within_sampleid_from_control <- subset(path_bray_dist_long_within_sampleid, path_bray_dist_long_within_sampleid$method_1 == "control" | path_bray_dist_long_within_sampleid$method_2 == "control" )
path_bray_dist_long_within_sampleid_from_control$treatment <- path_bray_dist_long_within_sampleid_from_control$method_1
path_bray_dist_long_within_sampleid_from_control$treatment <- ifelse(path_bray_dist_long_within_sampleid_from_control$treatment == "control", path_bray_dist_long_within_sampleid_from_control$method_2, path_bray_dist_long_within_sampleid_from_control$treatment)
path_bray_dist_long_within_sampleid_from_control$sample_type <- ifelse(grepl("NS", path_bray_dist_long_within_sampleid_from_control$iso1), "nasal_swab",
ifelse(grepl("CFB", path_bray_dist_long_within_sampleid_from_control$iso1), "Sputum",
ifelse(grepl("BAL", path_bray_dist_long_within_sampleid_from_control$iso1), "BAL", NA)))
label <- c("BAL","Nasal","Sputum")
names(label) <- c("BAL","nasal_swab","Sputum")
ggplot(path_bray_dist_long_within_sampleid_from_control, aes(y = dist, fill = treatment)) +
geom_boxplot() +
#scale_fill_manual(values = c(viridis(6)[2:6])) +
scale_fill_manual(values = c("#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("Sample pair distances") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "F") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type, labeller = labeller(sample_type = label))
Function PERMANOVA test results
Treatment as categorized group
dist ~ sample_type + log10(Final_reads) + treatment, strata = subject
No significant changes were observed.
bray_perm_uni_strata %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html", caption = "Subject id as strata term") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 4 | 32.217 | 0.767 | 133.512 | 0.000 |
|
| log10(Final reads) | 1 | 1.138 | 0.027 | 18.870 | 0.000 |
|
| Treatment | 5 | 0.450 | 0.011 | 1.491 | 0.107 | |
| Residual | 136 | 8.204 | 0.195 | NA | NA | |
| Total | 146 | 42.010 | 1.000 | NA | NA |
Function PERMAONVA (detailed treatment)
dist ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp, strata = subject
No significant changes were observed.
bray_perm_strata %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 4 | 32.217 | 0.767 | 133.512 | 0.000 |
|
| log10(Final reads) | 1 | 1.138 | 0.027 | 18.870 | 0.000 |
|
| lyPMA | 1 | 0.048 | 0.001 | 0.791 | 0.528 | |
| Benzonase | 1 | 0.035 | 0.001 | 0.577 | 0.727 | |
| Host zero | 1 | 0.032 | 0.001 | 0.534 | 0.796 | |
| Molysis | 1 | 0.134 | 0.003 | 2.216 | 0.063 | |
| QIAamp | 1 | 0.201 | 0.005 | 3.337 | 0.024 |
|
| Residual | 136 | 8.204 | 0.195 | NA | NA | |
| Total | 146 | 42.010 | 1.000 | NA | NA |
QIAamp showed highest changes. But, it could be sample type specific.
Function interaction term
dist ~ sample_type * treatment + log10(Final_reads), strata = subject
Some changes were treatment induced?
We don’t have to run stratified anlysis
bray_perm_inter %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "sample_type:treatment" ~ 'Sample type * treatment',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 4 | 32.217 | 0.767 | 160.103 | 0 |
|
| Treatment | 5 | 0.686 | 0.016 | 2.729 | 0 |
|
| log10(Final reads) | 1 | 0.902 | 0.021 | 17.925 | 0 |
|
| Sample type * treatment | 20 | 2.369 | 0.056 | 2.354 | 0 |
|
| Residual | 116 | 5.836 | 0.139 | NA | NA | |
| Total | 146 | 42.010 | 1.000 | NA | NA |
Stratified (NS)
Stratified analysis not matching with boxplot results
bray_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.011 | 0.042 | 2.193 | 0.056 | |
| Benzonase | 1 | 0.011 | 0.040 | 2.076 | 0.171 | |
| Host zero | 1 | 0.009 | 0.033 | 1.728 | 0.227 | |
| Molysis | 1 | 0.014 | 0.052 | 2.746 | 0.100 | |
| QIAamp | 1 | 0.054 | 0.197 | 10.324 | 0.000 |
|
| log10(Final reads) | 1 | 0.028 | 0.103 | 5.405 | 0.019 |
|
| Residual | 28 | 0.147 | 0.534 | NA | NA | |
| Total | 34 | 0.275 | 1.000 | NA | NA |
Stratified (BAL)
Stratified analysis not matching with boxplot results
bray_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.040 | 0.020 | 0.800 | 0.346 | |
| Benzonase | 1 | 0.115 | 0.058 | 2.279 | 0.091 | |
| Host zero | 1 | 0.027 | 0.014 | 0.543 | 0.403 | |
| Molysis | 1 | 0.180 | 0.091 | 3.569 | 0.047 |
|
| QIAamp | 1 | 0.001 | 0.000 | 0.018 | 0.942 | |
| log10(Final reads) | 1 | 0.603 | 0.306 | 11.980 | 0.078 | |
| Residual | 20 | 1.007 | 0.510 | NA | NA | |
| Total | 26 | 1.973 | 1.000 | NA | NA |
Stratified (spt)
Stratified analysis not matching with boxplot results
bray_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.017 | 0.023 | 3.773 | 0.062 | |
| Benzonase | 1 | 0.002 | 0.003 | 0.423 | 0.576 | |
| Host zero | 1 | 0.064 | 0.089 | 14.506 | 0.004 |
|
| Molysis | 1 | 0.139 | 0.191 | 31.291 | 0.000 |
|
| QIAamp | 1 | 0.370 | 0.509 | 83.348 | 0.000 |
|
| log10(Final reads) | 1 | 0.032 | 0.045 | 7.303 | 0.010 |
|
| Residual | 23 | 0.102 | 0.141 | NA | NA | |
| Total | 29 | 0.726 | 1.000 | NA | NA |
Results:
A9. DA analysis for taxa, by sample type and treatment
Both stratified and nonstratified were conducted.
MaAsLin condition:
Transformation: log transform
Normalization: None - as functional hits were normalized as RPKM already.
https://forum.biobakery.org/t/maaslin-with-shortbred-results-and-panphlan/3102
Results
#DA analysis - MaAslin
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
#Running MaAslin for all sample without decontam
#for taxa differentially abundant by host depletion method, look to see which ones overlap with potential contaminant taxa
# Maaslin - # # y ~ log(final reads) + sample_type + treatment -----------
#all samples
f_maaslin_all <- read.csv("data/f_maaslin_all.csv")
f_maaslin_interaction <- read.csv("data/f_maaslin_interaction.csv")
f_fit_data_bal <- read.csv("data/f_fit_data_bal.csv")
f_fit_data_spt <- read.csv("data/f_fit_data_spt.csv")
f_fit_data_ns <- read.csv("data/f_fit_data_ns.csv")
f_fit_data_pos <- read.csv("data/f_fit_data_pos.csv")
f_fit_data_neg <- read.csv("data/f_fit_data_neg.csv")
f_fit_data_bal_neg <- read.csv("data/f_fit_data_bal_neg.csv")
f_fit_data_spt_neg <- read.csv("data/f_fit_data_spt_neg.csv")
f_fit_data_ns_neg <- read.csv("data/f_fit_data_ns_neg.csv")
MaAslin without interaction - volcano plot
Again, most of DA functions were sample type specific
#Making significance table for figure
# Define a function to make species names italicized
# Make a significance table for each figure (top 20 taxa)
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$feature <- gsub("[.]", "-", sig_data$feature)
sig_data$min <- apply(sig_data, 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% select("feature", "lypma", "benzonase", "host_zero", "molysis", "qiaamp") %>% .[1:20,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>%
column_to_rownames(var = "feature") %>% select(-c("-")) %>%
rename(lyPMA = lypma, Benzonase = benzonase, `Host zero` = host_zero, Molysis = molysis, QIAamp = qiaamp)
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA) %>% data.frame(check.names = F)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
f_fit_data_neg <- make_sig_table(f_fit_data_neg)
f_fit_data_pos <- make_sig_table(f_fit_data_pos)
f_fit_data_bal <- make_sig_table(f_fit_data_bal)
f_fit_data_ns <- make_sig_table(f_fit_data_ns)
f_fit_data_spt <- make_sig_table(f_fit_data_spt)
f_fit_data_bal_neg <- make_sig_table(f_fit_data_bal_neg)
f_fit_data_ns_neg <- make_sig_table(f_fit_data_ns_neg)
f_fit_data_spt_neg <- make_sig_table(f_fit_data_spt_neg)
f_neg_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Neg."),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Neg.")) %in% f_fit_data_neg$data$feature)
f_fit_data_neg$rel <- cbind(f_neg_sig %>% otu_table %>% t, f_neg_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_neg$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_pos_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Mock"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Mock")) %in% f_fit_data_pos$data$feature)
f_fit_data_pos$rel <- cbind(f_pos_sig %>% otu_table %>% t, f_pos_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_pos$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_spt_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Sputum")) %in% f_fit_data_spt$data$feature)
fit_data_spt$rel <- cbind(f_spt_sig %>% otu_table %>% t, f_spt_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_spt$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_spt_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"),
taxa_names(subset_samples(phyloseq_rel_nz,
sample_type == "Sputum")) %in% f_fit_data_spt$data$feature)
f_fit_data_spt$rel <- cbind(f_spt_sig %>% otu_table %>% t, f_spt_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_spt$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_spt_neg_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"),
taxa_names(subset_samples(phyloseq_rel_nz,
sample_type == "Sputum")) %in% f_fit_data_spt_neg$data$feature)
f_fit_data_spt_neg$rel <- cbind(f_spt_neg_sig %>% otu_table %>% t, f_spt_neg_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_spt_neg$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_ns_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Nasal")) %in% f_fit_data_ns$data$feature)
f_fit_data_ns$rel <- cbind(f_ns_sig %>% otu_table %>% t, f_ns_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_ns$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_fit_data_ns$rel$feature <- row.names(f_fit_data_ns$data_sig)
f_ns_neg_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"),
taxa_names(subset_samples(phyloseq_rel_nz,
sample_type == "Nasal")) %in% f_fit_data_spt_neg$data$feature)
f_fit_data_ns_neg$rel <- cbind(f_ns_neg_sig %>% otu_table %>% t, f_ns_neg_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_spt_neg$data_italic),] %>%
mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_bal_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "BAL"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "BAL")) %in% f_fit_data_bal$data$feature)
f_fit_data_bal$rel <- cbind(f_bal_sig %>% otu_table %>% t, f_bal_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_bal$data_italic),] %>%
mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_bal_neg_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "BAL"),
taxa_names(subset_samples(phyloseq_rel_nz,
sample_type == "BAL")
) %in% f_fit_data_bal_neg$data$feature)
f_fit_data_bal_neg$rel <- cbind(f_bal_neg_sig %>% otu_table %>% t, f_bal_neg_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_bal_neg$data_italic),] %>%
mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
#Volcano plot
ggplot(f_maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
labs(tag = "A") +
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
#geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#4daf4a", "#984ea3", "#f781bf", "#377eb8", "#ff7f00", "#ffff33", "#a65628"),
breaks = c("log10.Final_reads", "sample_type", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("log10 (Final reads)", "Sample type", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
guides(col = guide_legend(title = "Covariates", title.position = "top", nrow = 2))
Most of the DA function were sample type dependent.
MaAsLin table (function)
Large number of functions were differentially aubundant.
f_maaslin_all %>% subset(., .$qval < 0.1) %>% .$metadata %>% table
## .
## benzonase host_zero log10.Final_reads lypma
## 90 100 368 83
## molysis qiaamp sample_type
## 16 36 591
Stratified analysis is required.
Baloon plot - Nasals
Similarly, few functions were newly discovered
merge(f_fit_data_spt$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
f_fit_data_spt$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(f_fit_data_spt$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
Baloon plot - BAL
Similarly, few functions were newly discovered
merge(f_fit_data_bal$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
f_fit_data_bal$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(f_fit_data_bal$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
Baloon plot - Nasal
Some functions were newly discovered.
merge(f_fit_data_ns$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
f_fit_data_ns$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(f_fit_data_ns$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
#Baloon plot
ggballoonplot(size = "value", fill = "qval", y = "feature", x= "treatment") +
theme_classic(base_family = "serif") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "D") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(vjust = 0.5, hjust=0.5),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown()) +
#Adding significance asterisks
geom_text(aes(y = feature,
x = treatment,
label = sig,
col = "red"),
hjust = -2,
vjust = 0.8,
size = 5) +
guides(col = guide_legend(nrow = 1,
override.aes = aes(label = "*", size = 10, color = "red"),
title="Significance",
title.position = "top", order = 3, ),
fill = guide_colorbar(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
order = 2),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 2),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
) +
scale_color_manual(values = c("red"),
labels = c(expression(paste(italic("q"),
"-value < 0.1",
sep = "")
)
)
)
Results After adding control data, MaAslin needs to be reanalyzed. Adding controls (mock communities) for each treatment group will show more statistically valid results in y ~ log(final reads) + sample_type + treatment, (re = subject_id))
MaAslin with interaction
#interaction term - ggplot
ggplot(f_maaslin_interaction, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
#labs(tag = "A") +
ggtitle("MaAslin with interaction term")+
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
#geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3")) +
guides(col = guide_legend(title = "Fixed effects", title.position = "top", nrow = 1))
#Checking number of bugs differentially abundance with interaction term
cat("Number of differentially abundant bugs by each metadata")
## Number of differentially abundant bugs by each metadata
maaslin_interaction %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## log10.Final_reads sample_type sampletype_treatment
## 38 146 560
## treatment
## 135
MaAsLin interaction analysis
Some taxa were increased by each treatmment. But they are not contaminants, if they are present in most of the treatments
f_maaslin_interaction %>% subset(., .$qval < 0.1 & .$metadata == "treatment") %>% .$feature %>% table %>% data.frame %>% arrange(-Freq) %>% rename(Feature = ".") %>% kbl(format = "html", caption = "Table of taxa differentially abundant by treatment") %>%
kable_styling(full_width = 0, html_font = "serif")
| Feature | Freq |
|---|---|
| PWY.5676 | 4 |
| COA.PWY | 3 |
| PWY.4981 | 3 |
| PWY.7221 | 3 |
| PEPTIDOGLYCANSYN.PWY | 2 |
| PWY.2942 | 2 |
| PWY.3781 | 2 |
| PWY.3841 | 2 |
| PWY.4242 | 2 |
| PWY.5097 | 2 |
| PWY.5188 | 2 |
| PWY.5189 | 2 |
| PWY.5659 | 2 |
| PWY.5675 | 2 |
| PWY.5686 | 2 |
| PWY.5913 | 2 |
| PWY.6163 | 2 |
| PWY.6385 | 2 |
| PWY.6386 | 2 |
| PWY.7184 | 2 |
| PWY.7199 | 2 |
| PWY.7219 | 2 |
| PWY.7237 | 2 |
| PWY.724 | 2 |
| PWY0.1586 | 2 |
| PWY0.166 | 2 |
| PWY66.389 | 2 |
| THRESYN.PWY | 2 |
| UDPNAGSYN.PWY | 2 |
| X1CMET2.PWY | 2 |
| ALLANTOINDEG.PWY | 1 |
| ANAEROFRUCAT.PWY | 1 |
| ANAGLYCOLYSIS.PWY | 1 |
| ARG.POLYAMINE.SYN | 1 |
| ARGDEG.PWY | 1 |
| ARGININE.SYN4.PWY | 1 |
| ARGORNPROST.PWY | 1 |
| ARO.PWY | 1 |
| ASPASN.PWY | 1 |
| AST.PWY | 1 |
| BIOTIN.BIOSYNTHESIS.PWY | 1 |
| BRANCHED.CHAIN.AA.SYN.PWY | 1 |
| CALVIN.PWY | 1 |
| CITRULBIO.PWY | 1 |
| COA.PWY.1 | 1 |
| COBALSYN.PWY | 1 |
| CODH.PWY | 1 |
| COLANSYN.PWY | 1 |
| COMPLETE.ARO.PWY | 1 |
| DENITRIFICATION.PWY | 1 |
| DTDPRHAMSYN.PWY | 1 |
| FAO.PWY | 1 |
| FASYN.INITIAL.PWY | 1 |
| FERMENTATION.PWY | 1 |
| FOLSYN.PWY | 1 |
| FUC.RHAMCAT.PWY | 1 |
| GALACT.GLUCUROCAT.PWY | 1 |
| GALACTARDEG.PWY | 1 |
| GALACTUROCAT.PWY | 1 |
| GLUCARDEG.PWY | 1 |
| GLUCARGALACTSUPER.PWY | 1 |
| GLUCONEO.PWY | 1 |
| GLUCOSE1PMETAB.PWY | 1 |
| GLUCUROCAT.PWY | 1 |
| GLYCOCAT.PWY | 1 |
| GLYCOGENSYNTH.PWY | 1 |
| GLYCOLYSIS | 1 |
| GLYCOLYSIS.E.D | 1 |
| GLYCOLYSIS.TCA.GLYOX.BYPASS | 1 |
| GLYOXYLATE.BYPASS | 1 |
| GOLPDLCAT.PWY | 1 |
| HEXITOLDEGSUPER.PWY | 1 |
| ILEUSYN.PWY | 1 |
| KETOGLUCONMET.PWY | 1 |
| LACTOSECAT.PWY | 1 |
| LIPASYN.PWY | 1 |
| MANNOSYL.CHITO.DOLICHOL.BIOSYNTHESIS | 1 |
| METHGLYUT.PWY | 1 |
| NADSYN.PWY | 1 |
| NONOXIPENT.PWY | 1 |
| OANTIGEN.PWY | 1 |
| ORNARGDEG.PWY | 1 |
| ORNDEG.PWY | 1 |
| P105.PWY | 1 |
| P122.PWY | 1 |
| P124.PWY | 1 |
| P125.PWY | 1 |
| P164.PWY | 1 |
| P165.PWY | 1 |
| P185.PWY | 1 |
| P221.PWY | 1 |
| P4.PWY | 1 |
| P42.PWY | 1 |
| P441.PWY | 1 |
| P461.PWY | 1 |
| P562.PWY | 1 |
| PANTOSYN.PWY | 1 |
| PENTOSE.P.PWY | 1 |
| PHOSLIPSYN.PWY | 1 |
| POLYAMINSYN3.PWY | 1 |
| POLYAMSYN.PWY | 1 |
| POLYISOPRENSYN.PWY | 1 |
| PPGPPMET.PWY | 1 |
| PRPP.PWY | 1 |
| PWY.1861 | 1 |
| PWY.2201 | 1 |
| PWY.241 | 1 |
| PWY.2723 | 1 |
| PWY.2941 | 1 |
| PWY.3001 | 1 |
| PWY.3502 | 1 |
| PWY.3801 | 1 |
| PWY.4041 | 1 |
| PWY.4361 | 1 |
| PWY.4702 | 1 |
| PWY.4984 | 1 |
| PWY.5005 | 1 |
| PWY.5022 | 1 |
| PWY.5028 | 1 |
| PWY.5044 | 1 |
| PWY.5067 | 1 |
| PWY.5079 | 1 |
| PWY.5081 | 1 |
| PWY.5083 | 1 |
| PWY.5100 | 1 |
| PWY.5103 | 1 |
| PWY.5104 | 1 |
| PWY.5129 | 1 |
| PWY.5136 | 1 |
| PWY.5138 | 1 |
| PWY.5154 | 1 |
| PWY.5177 | 1 |
| PWY.5265 | 1 |
| PWY.5304 | 1 |
| PWY.5306 | 1 |
| PWY.5345 | 1 |
| PWY.5347 | 1 |
| PWY.5367 | 1 |
| PWY.5381 | 1 |
| PWY.5384 | 1 |
| PWY.5464 | 1 |
| PWY.5505 | 1 |
| PWY.5514 | 1 |
| PWY.561 | 1 |
| PWY.5651 | 1 |
| PWY.5656 | 1 |
| PWY.5667 | 1 |
| PWY.5690 | 1 |
| PWY.5692 | 1 |
| PWY.5705 | 1 |
| PWY.5723 | 1 |
| PWY.5747 | 1 |
| PWY.5791 | 1 |
| PWY.5837 | 1 |
| PWY.5838 | 1 |
| PWY.5840 | 1 |
| PWY.5850 | 1 |
| PWY.5855 | 1 |
| PWY.5856 | 1 |
| PWY.5857 | 1 |
| PWY.5860 | 1 |
| PWY.5861 | 1 |
| PWY.5863 | 1 |
| PWY.5870 | 1 |
| PWY.5871 | 1 |
| PWY.5872 | 1 |
| PWY.5873 | 1 |
| PWY.5897 | 1 |
| PWY.5898 | 1 |
| PWY.5899 | 1 |
| PWY.5910 | 1 |
| PWY.5918 | 1 |
| PWY.5920 | 1 |
| PWY.5941 | 1 |
| PWY.5971 | 1 |
| PWY.5973 | 1 |
| PWY.5989 | 1 |
| PWY.6075 | 1 |
| PWY.6113 | 1 |
| PWY.6125 | 1 |
| PWY.6147 | 1 |
| PWY.6168 | 1 |
| PWY.621 | 1 |
| PWY.6263 | 1 |
| PWY.6284 | 1 |
| PWY.6285 | 1 |
| PWY.6309 | 1 |
| PWY.6317 | 1 |
| PWY.6318 | 1 |
| PWY.6351 | 1 |
| PWY.6352 | 1 |
| PWY.6353 | 1 |
| PWY.6396 | 1 |
| PWY.6470 | 1 |
| PWY.6471 | 1 |
| PWY.6507 | 1 |
| PWY.6519 | 1 |
| PWY.6527 | 1 |
| PWY.6531 | 1 |
| PWY.6549 | 1 |
| PWY.6588 | 1 |
| PWY.6595 | 1 |
| PWY.6596 | 1 |
| PWY.6606 | 1 |
| PWY.6608 | 1 |
| PWY.6609 | 1 |
| PWY.6612 | 1 |
| PWY.6628 | 1 |
| PWY.6630 | 1 |
| PWY.6703 | 1 |
| PWY.6708 | 1 |
| PWY.6737 | 1 |
| PWY.6797 | 1 |
| PWY.6829 | 1 |
| PWY.6859 | 1 |
| PWY.6891 | 1 |
| PWY.6892 | 1 |
| PWY.6895 | 1 |
| PWY.6901 | 1 |
| PWY.6969 | 1 |
| PWY.6981 | 1 |
| PWY.6992 | 1 |
| PWY.7003 | 1 |
| PWY.7007 | 1 |
| PWY.7036 | 1 |
| PWY.7039 | 1 |
| PWY.7053 | 1 |
| PWY.7115 | 1 |
| PWY.7117 | 1 |
| PWY.7118 | 1 |
| PWY.7196 | 1 |
| PWY.7197 | 1 |
| PWY.7198 | 1 |
| PWY.7200 | 1 |
| PWY.7204 | 1 |
| PWY.7208 | 1 |
| PWY.7210 | 1 |
| PWY.7211 | 1 |
| PWY.7228 | 1 |
| PWY.7242 | 1 |
| PWY.7245 | 1 |
| PWY.7254 | 1 |
| PWY.7268 | 1 |
| PWY.7269 | 1 |
| PWY.7279 | 1 |
| PWY.7282 | 1 |
| PWY.7283 | 1 |
| PWY.7286 | 1 |
| PWY.7288 | 1 |
| PWY.7323 | 1 |
| PWY.7328 | 1 |
| PWY.7337 | 1 |
| PWY.7338 | 1 |
| PWY.7345 | 1 |
| PWY.7385 | 1 |
| PWY.7388 | 1 |
| PWY.7391 | 1 |
| PWY.7409 | 1 |
| PWY.7411 | 1 |
| PWY.7527 | 1 |
| PWY.7528 | 1 |
| PWY.7539 | 1 |
| PWY.7546 | 1 |
| PWY.7592 | 1 |
| PWY.7606 | 1 |
| PWY.7626 | 1 |
| PWY.821 | 1 |
| PWY.922 | 1 |
| PWY0.1061 | 1 |
| PWY0.1261 | 1 |
| PWY0.1297 | 1 |
| PWY0.1298 | 1 |
| PWY0.1319 | 1 |
| PWY0.1479 | 1 |
| PWY0.162 | 1 |
| PWY0.42 | 1 |
| PWY0.781 | 1 |
| PWY0.845 | 1 |
| PWY0.881 | 1 |
| PWY3O.19 | 1 |
| PWY3O.355 | 1 |
| PWY4FS.7 | 1 |
| PWY4FS.8 | 1 |
| PWY4LZ.257 | 1 |
| PWY66.201 | 1 |
| PWY66.367 | 1 |
| PWY66.388 | 1 |
| PWY66.391 | 1 |
| PWY66.398 | 1 |
| PWY66.400 | 1 |
| PWY66.409 | 1 |
| PWY66.422 | 1 |
| PWYG.321 | 1 |
| PYRIDNUCSAL.PWY | 1 |
| PYRIDNUCSYN.PWY | 1 |
| PYRIDOXSYN.PWY | 1 |
| REDCITCYC | 1 |
| RHAMCAT.PWY | 1 |
| RUMP.PWY | 1 |
| SALVADEHYPOX.PWY | 1 |
| SO4ASSIM.PWY | 1 |
| SPHINGOLIPID.SYN.PWY | 1 |
| SULFATE.CYS.PWY | 1 |
| TCA | 1 |
| TCA.GLYOX.BYPASS | 1 |
| TEICHOICACID.PWY | 1 |
| THISYN.PWY | 1 |
| THISYNARA.PWY | 1 |
| TRIGLSYN.PWY | 1 |
| TRNA.CHARGING.PWY | 1 |
| TRPSYN.PWY | 1 |
| UBISYN.PWY | 1 |
| URDEGR.PWY | 1 |
| URSIN.PWY | 1 |
| VALDEG.PWY | 1 |
| VALSYN.PWY | 1 |
cat("Most of taxa were found on most of treatments.")
## Most of taxa were found on most of treatments.
cat("Some taxa were treatment specific, only to one group")
## Some taxa were treatment specific, only to one group
subset(f_maaslin_interaction, f_maaslin_interaction$feature %in% (f_maaslin_interaction %>% subset(., .$qval < 0.1 & .$metadata == "treatment") %>%
.$feature %>% table %>% data.frame %>% subset(., Freq == 1) %>% .$. %>% as.character())) %>% subset(., .$qval < 0.1) %>% select(c("feature", "metadata", "value", "coef", "qval")) %>% subset(., .$metadata == "treatment") %>%
remove_rownames() %>% kbl(format = "html", caption = "Table of taxa specific to one treatment group") %>%
kable_styling(full_width = 0, html_font = "serif")
| feature | metadata | value | coef | qval |
|---|---|---|---|---|
| PWY.6606 | treatment | lyPMA | 18.389501 | 0.0000814 |
| PWY.6595 | treatment | lyPMA | 17.782892 | 0.0000987 |
| GLUCONEO.PWY | treatment | lyPMA | 17.256004 | 0.0001390 |
| PWY4FS.7 | treatment | lyPMA | 17.063858 | 0.0001953 |
| PWY4FS.8 | treatment | lyPMA | 17.064245 | 0.0001953 |
| PPGPPMET.PWY | treatment | lyPMA | 19.165180 | 0.0002422 |
| GLYCOLYSIS.TCA.GLYOX.BYPASS | treatment | lyPMA | 16.371591 | 0.0002469 |
| HEXITOLDEGSUPER.PWY | treatment | lyPMA | 16.055861 | 0.0002803 |
| PWY.5345 | treatment | lyPMA | 15.277318 | 0.0003947 |
| ASPASN.PWY | treatment | lyPMA | 16.170863 | 0.0004366 |
| PWY.6168 | treatment | lyPMA | 14.122654 | 0.0004461 |
| PWY.6396 | treatment | lyPMA | 13.654288 | 0.0004865 |
| PWY.5920 | treatment | lyPMA | 15.911001 | 0.0004913 |
| PWY.5910 | treatment | lyPMA | 14.332183 | 0.0004925 |
| PWY.7385 | treatment | lyPMA | 15.377686 | 0.0006016 |
| PWY.5840 | treatment | lyPMA | 15.167528 | 0.0006230 |
| PWY.5791 | treatment | lyPMA | 15.600813 | 0.0006884 |
| PWY.5837 | treatment | lyPMA | 15.600813 | 0.0006884 |
| PWY.5897 | treatment | lyPMA | 15.167533 | 0.0006950 |
| PWY.5898 | treatment | lyPMA | 15.167533 | 0.0006950 |
| PWY.5899 | treatment | lyPMA | 15.167533 | 0.0006950 |
| PWY.6628 | treatment | lyPMA | 16.003259 | 0.0006950 |
| PWY.922 | treatment | lyPMA | 13.502315 | 0.0006969 |
| FOLSYN.PWY | treatment | lyPMA | 13.388785 | 0.0008161 |
| PWY.5028 | treatment | QIAamp | 15.169594 | 0.0008411 |
| PWY0.1298 | treatment | lyPMA | 16.106334 | 0.0008777 |
| PWY.6612 | treatment | lyPMA | 13.241923 | 0.0008917 |
| TCA.GLYOX.BYPASS | treatment | lyPMA | 16.597331 | 0.0009351 |
| PWY.7198 | treatment | lyPMA | 13.102111 | 0.0009455 |
| ANAEROFRUCAT.PWY | treatment | lyPMA | 14.827707 | 0.0009586 |
| P124.PWY | treatment | lyPMA | 14.911091 | 0.0009864 |
| PWY.5705 | treatment | lyPMA | 13.838064 | 0.0010004 |
| PWY.5850 | treatment | lyPMA | 13.249212 | 0.0010205 |
| PWY.7115 | treatment | lyPMA | 14.084357 | 0.0010205 |
| GOLPDLCAT.PWY | treatment | lyPMA | 15.043431 | 0.0010247 |
| PWY.561 | treatment | lyPMA | 15.066937 | 0.0010247 |
| PWY.5860 | treatment | lyPMA | 13.255502 | 0.0010367 |
| PWY.5505 | treatment | lyPMA | 13.540425 | 0.0010465 |
| RHAMCAT.PWY | treatment | lyPMA | 14.818486 | 0.0010661 |
| PWY.5863 | treatment | lyPMA | 14.426545 | 0.0011214 |
| PWY0.1297 | treatment | lyPMA | 15.277661 | 0.0011302 |
| TRNA.CHARGING.PWY | treatment | lyPMA | 13.222975 | 0.0011588 |
| PWY.7210 | treatment | lyPMA | 15.982394 | 0.0011791 |
| P42.PWY | treatment | lyPMA | 14.902692 | 0.0011823 |
| FAO.PWY | treatment | lyPMA | 16.967273 | 0.0012079 |
| PWY.5136 | treatment | lyPMA | 16.772412 | 0.0012519 |
| PWY.6507 | treatment | lyPMA | 13.449180 | 0.0012526 |
| PWY.6630 | treatment | lyPMA | 13.835166 | 0.0012555 |
| PWY4LZ.257 | treatment | lyPMA | 13.509248 | 0.0013358 |
| ALLANTOINDEG.PWY | treatment | lyPMA | 13.283245 | 0.0013481 |
| P105.PWY | treatment | lyPMA | 16.191968 | 0.0013801 |
| PWY.7003 | treatment | lyPMA | 12.875324 | 0.0013815 |
| POLYISOPRENSYN.PWY | treatment | lyPMA | 14.247257 | 0.0014108 |
| METHGLYUT.PWY | treatment | lyPMA | 13.714242 | 0.0014258 |
| P441.PWY | treatment | lyPMA | 13.354373 | 0.0014724 |
| PWY.6549 | treatment | lyPMA | 13.990292 | 0.0015392 |
| REDCITCYC | treatment | lyPMA | 14.452201 | 0.0015392 |
| P122.PWY | treatment | lyPMA | 13.259414 | 0.0015512 |
| PWY.7269 | treatment | lyPMA | 15.326825 | 0.0016898 |
| SULFATE.CYS.PWY | treatment | lyPMA | 14.118694 | 0.0017211 |
| PWY.7211 | treatment | lyPMA | 14.093166 | 0.0018108 |
| PRPP.PWY | treatment | lyPMA | 12.820969 | 0.0019102 |
| P185.PWY | treatment | lyPMA | 16.547635 | 0.0019301 |
| PWY.7391 | treatment | lyPMA | 13.353611 | 0.0019535 |
| PWY.5104 | treatment | lyPMA | 17.460045 | 0.0019643 |
| P165.PWY | treatment | lyPMA | 13.724387 | 0.0020243 |
| PWY.6471 | treatment | lyPMA | 16.564939 | 0.0020464 |
| PWY.5855 | treatment | lyPMA | 18.708666 | 0.0020593 |
| PWY.5856 | treatment | lyPMA | 18.708666 | 0.0020593 |
| PWY.5857 | treatment | lyPMA | 18.708666 | 0.0020593 |
| PWY.6708 | treatment | lyPMA | 18.708666 | 0.0020593 |
| PWY.7242 | treatment | lyPMA | 12.851781 | 0.0023932 |
| PWY.5100 | treatment | lyPMA | 15.788423 | 0.0023988 |
| PWY.5265 | treatment | lyPMA | 16.109718 | 0.0024169 |
| PWY.6901 | treatment | lyPMA | 13.828397 | 0.0025764 |
| PWY.5971 | treatment | lyPMA | 12.739935 | 0.0026263 |
| PWY.6075 | treatment | lyPMA | 1.885657 | 0.0026394 |
| PWY.6737 | treatment | lyPMA | 13.331319 | 0.0026728 |
| PWY0.881 | treatment | lyPMA | 13.003127 | 0.0027499 |
| PWY0.1479 | treatment | lyPMA | 17.776618 | 0.0028479 |
| PANTOSYN.PWY | treatment | lyPMA | 14.327664 | 0.0028724 |
| PWY.6284 | treatment | lyPMA | 12.557201 | 0.0030359 |
| PENTOSE.P.PWY | treatment | lyPMA | 13.029626 | 0.0030721 |
| FASYN.INITIAL.PWY | treatment | lyPMA | 16.247650 | 0.0031298 |
| GALACTUROCAT.PWY | treatment | lyPMA | 12.599580 | 0.0032876 |
| PWY.5690 | treatment | lyPMA | 18.029333 | 0.0032928 |
| DTDPRHAMSYN.PWY | treatment | lyPMA | 12.111295 | 0.0033428 |
| RUMP.PWY | treatment | lyPMA | 14.382113 | 0.0033428 |
| PWY.4361 | treatment | lyPMA | 11.474191 | 0.0033463 |
| PWY0.845 | treatment | lyPMA | 16.116011 | 0.0033463 |
| PWY.5723 | treatment | lyPMA | 13.326350 | 0.0034085 |
| PWY.6147 | treatment | QIAamp | 15.228135 | 0.0034161 |
| GLYCOLYSIS.E.D | treatment | lyPMA | 16.204866 | 0.0035880 |
| PWY.6588 | treatment | lyPMA | 12.009264 | 0.0036163 |
| PWY.5367 | treatment | lyPMA | 12.381616 | 0.0036949 |
| OANTIGEN.PWY | treatment | lyPMA | 15.464195 | 0.0037187 |
| PYRIDOXSYN.PWY | treatment | lyPMA | 16.119948 | 0.0038151 |
| PWY66.391 | treatment | lyPMA | 12.656688 | 0.0039580 |
| PWY.6969 | treatment | lyPMA | 17.425122 | 0.0041174 |
| PWY.7288 | treatment | lyPMA | 12.822882 | 0.0042189 |
| PWY0.781 | treatment | lyPMA | 15.608321 | 0.0042231 |
| PWY.5667 | treatment | lyPMA | 13.144588 | 0.0044231 |
| PWY0.1319 | treatment | lyPMA | 13.143321 | 0.0044419 |
| PWY.5973 | treatment | lyPMA | 16.844201 | 0.0045946 |
| GLYOXYLATE.BYPASS | treatment | lyPMA | 16.748078 | 0.0046480 |
| P4.PWY | treatment | lyPMA | 15.351166 | 0.0046480 |
| GLYCOLYSIS | treatment | lyPMA | 14.304330 | 0.0046790 |
| ARG.POLYAMINE.SYN | treatment | lyPMA | 12.773547 | 0.0047002 |
| PWY0.162 | treatment | lyPMA | 13.058269 | 0.0047976 |
| LACTOSECAT.PWY | treatment | lyPMA | 15.650513 | 0.0050178 |
| PWY3O.355 | treatment | lyPMA | 13.007577 | 0.0052165 |
| POLYAMSYN.PWY | treatment | lyPMA | 12.714902 | 0.0054444 |
| URSIN.PWY | treatment | lyPMA | 12.896544 | 0.0058773 |
| PYRIDNUCSAL.PWY | treatment | lyPMA | 11.887690 | 0.0062175 |
| ANAGLYCOLYSIS.PWY | treatment | lyPMA | 12.627492 | 0.0062815 |
| PWY.1861 | treatment | lyPMA | 15.272698 | 0.0063969 |
| PWY66.400 | treatment | lyPMA | 13.005820 | 0.0064033 |
| TEICHOICACID.PWY | treatment | lyPMA | 15.432104 | 0.0064033 |
| PWY.5005 | treatment | lyPMA | 12.014463 | 0.0065655 |
| PYRIDNUCSYN.PWY | treatment | lyPMA | 15.031384 | 0.0066700 |
| THISYN.PWY | treatment | lyPMA | 12.063966 | 0.0071132 |
| P461.PWY | treatment | lyPMA | 12.966218 | 0.0071716 |
| PWYG.321 | treatment | lyPMA | 15.974095 | 0.0071819 |
| PWY.6895 | treatment | lyPMA | 11.934830 | 0.0071903 |
| PWY.6859 | treatment | lyPMA | 13.636234 | 0.0074640 |
| PWY.7279 | treatment | Host zero | 14.583841 | 0.0074694 |
| PWY.5138 | treatment | lyPMA | 12.756972 | 0.0076010 |
| PWY0.1261 | treatment | lyPMA | 15.345137 | 0.0077425 |
| PWY.6608 | treatment | lyPMA | 17.870291 | 0.0077493 |
| ARGININE.SYN4.PWY | treatment | lyPMA | 14.469480 | 0.0078212 |
| PWY.4702 | treatment | lyPMA | 11.932389 | 0.0080317 |
| PHOSLIPSYN.PWY | treatment | lyPMA | 11.974673 | 0.0080452 |
| PWY.4984 | treatment | lyPMA | 16.543725 | 0.0080696 |
| PWY.7254 | treatment | lyPMA | 16.469770 | 0.0082304 |
| PWY.821 | treatment | lyPMA | 12.926731 | 0.0084266 |
| P221.PWY | treatment | lyPMA | 14.943366 | 0.0084597 |
| GLYCOGENSYNTH.PWY | treatment | lyPMA | 12.710306 | 0.0085771 |
| TRPSYN.PWY | treatment | lyPMA | 12.996795 | 0.0088696 |
| PWY.6353 | treatment | lyPMA | 14.095459 | 0.0090426 |
| PWY.7323 | treatment | lyPMA | 18.015799 | 0.0091049 |
| PWY.7388 | treatment | lyPMA | 15.073360 | 0.0094306 |
| PWY0.1061 | treatment | lyPMA | 12.552240 | 0.0101117 |
| PWY.6892 | treatment | lyPMA | 12.195731 | 0.0101317 |
| PWY.5747 | treatment | lyPMA | 16.562231 | 0.0102062 |
| PWY.6797 | treatment | lyPMA | 12.373885 | 0.0102583 |
| PWY66.422 | treatment | lyPMA | 14.548810 | 0.0104973 |
| PWY.6703 | treatment | lyPMA | 17.222837 | 0.0107580 |
| PWY.6891 | treatment | lyPMA | 12.081943 | 0.0108591 |
| PWY.6470 | treatment | lyPMA | 13.728480 | 0.0109622 |
| ARGORNPROST.PWY | treatment | lyPMA | 15.459228 | 0.0109991 |
| CITRULBIO.PWY | treatment | lyPMA | 15.523020 | 0.0110532 |
| PWY.7200 | treatment | lyPMA | 15.657602 | 0.0112937 |
| COLANSYN.PWY | treatment | lyPMA | 16.702069 | 0.0117643 |
| PWY.2723 | treatment | lyPMA | 12.482434 | 0.0117817 |
| P562.PWY | treatment | lyPMA | 13.603124 | 0.0120290 |
| PWY.5306 | treatment | lyPMA | 12.589679 | 0.0121561 |
| P125.PWY | treatment | lyPMA | 14.801576 | 0.0125561 |
| GLUCARDEG.PWY | treatment | lyPMA | 12.960754 | 0.0129790 |
| VALSYN.PWY | treatment | Host zero | 13.069205 | 0.0130309 |
| PWY.5083 | treatment | lyPMA | 16.449227 | 0.0130437 |
| GLYCOCAT.PWY | treatment | lyPMA | 12.149412 | 0.0138727 |
| PWY.7245 | treatment | lyPMA | 13.365305 | 0.0140118 |
| PWY.6317 | treatment | lyPMA | 12.617950 | 0.0141544 |
| KETOGLUCONMET.PWY | treatment | lyPMA | 12.555228 | 0.0146718 |
| COMPLETE.ARO.PWY | treatment | lyPMA | 12.092341 | 0.0160954 |
| ARO.PWY | treatment | lyPMA | 12.068662 | 0.0163385 |
| PWY.3001 | treatment | lyPMA | 12.884738 | 0.0163844 |
| PWY.6519 | treatment | lyPMA | 15.691302 | 0.0165234 |
| PWY.5692 | treatment | lyPMA | 13.078809 | 0.0180252 |
| URDEGR.PWY | treatment | lyPMA | 13.078809 | 0.0180252 |
| PWY.6318 | treatment | lyPMA | 16.523215 | 0.0181690 |
| PWY66.367 | treatment | lyPMA | 13.089242 | 0.0186774 |
| VALDEG.PWY | treatment | lyPMA | 12.825092 | 0.0192811 |
| FERMENTATION.PWY | treatment | lyPMA | 15.708787 | 0.0193996 |
| BIOTIN.BIOSYNTHESIS.PWY | treatment | lyPMA | 13.895550 | 0.0200685 |
| PWY.2941 | treatment | lyPMA | 15.067424 | 0.0204743 |
| GLUCUROCAT.PWY | treatment | lyPMA | 12.591440 | 0.0218580 |
| TCA | treatment | lyPMA | 14.044790 | 0.0220336 |
| ARGDEG.PWY | treatment | lyPMA | 13.151693 | 0.0225968 |
| ORNARGDEG.PWY | treatment | lyPMA | 13.151693 | 0.0225968 |
| GALACT.GLUCUROCAT.PWY | treatment | lyPMA | 12.875511 | 0.0228788 |
| GALACTARDEG.PWY | treatment | lyPMA | 12.334075 | 0.0234447 |
| GLUCARGALACTSUPER.PWY | treatment | lyPMA | 12.334075 | 0.0234447 |
| PWY.5918 | treatment | lyPMA | 13.226297 | 0.0239728 |
| COA.PWY.1 | treatment | QIAamp | 11.665149 | 0.0244971 |
| SALVADEHYPOX.PWY | treatment | lyPMA | 13.776763 | 0.0252679 |
| PWY.6113 | treatment | lyPMA | 11.517436 | 0.0275131 |
| POLYAMINSYN3.PWY | treatment | lyPMA | 12.030321 | 0.0276100 |
| FUC.RHAMCAT.PWY | treatment | lyPMA | 12.152742 | 0.0276614 |
| THISYNARA.PWY | treatment | lyPMA | 13.676297 | 0.0301478 |
| PWY.6527 | treatment | lyPMA | 12.408333 | 0.0306687 |
| PWY66.398 | treatment | lyPMA | 12.940431 | 0.0307942 |
| CALVIN.PWY | treatment | lyPMA | 14.308542 | 0.0310434 |
| PWY.6992 | treatment | lyPMA | 11.226064 | 0.0312757 |
| NONOXIPENT.PWY | treatment | QIAamp | 11.941759 | 0.0318398 |
| PWY.5464 | treatment | lyPMA | 12.355635 | 0.0327029 |
| SO4ASSIM.PWY | treatment | lyPMA | 14.174681 | 0.0331532 |
| PWY.5514 | treatment | lyPMA | 10.173603 | 0.0335190 |
| PWY.241 | treatment | lyPMA | 15.649721 | 0.0338503 |
| PWY.5384 | treatment | lyPMA | 12.893289 | 0.0338503 |
| PWY.6609 | treatment | Benzonase | 12.401066 | 0.0347366 |
| PWY.7268 | treatment | lyPMA | 12.322190 | 0.0349421 |
| PWY.7626 | treatment | lyPMA | 10.602569 | 0.0350276 |
| PWY.6285 | treatment | lyPMA | 12.924402 | 0.0350335 |
| PWY66.201 | treatment | lyPMA | 10.670948 | 0.0368275 |
| PWY.5129 | treatment | lyPMA | 11.954992 | 0.0370265 |
| PWY.7283 | treatment | lyPMA | 11.940412 | 0.0374430 |
| PWY.7527 | treatment | lyPMA | 12.063949 | 0.0375495 |
| PWY.7528 | treatment | lyPMA | 10.434383 | 0.0377512 |
| PWY.7328 | treatment | lyPMA | 12.208034 | 0.0381543 |
| PWY.5154 | treatment | lyPMA | 16.618450 | 0.0394516 |
| PWY.6352 | treatment | lyPMA | 11.875163 | 0.0394516 |
| PWY.7039 | treatment | lyPMA | 11.597086 | 0.0395028 |
| PWY.6309 | treatment | lyPMA | 11.722748 | 0.0397332 |
| PWY.7592 | treatment | lyPMA | 11.935561 | 0.0402630 |
| MANNOSYL.CHITO.DOLICHOL.BIOSYNTHESIS | treatment | lyPMA | 11.651141 | 0.0404156 |
| TRIGLSYN.PWY | treatment | lyPMA | 11.438796 | 0.0405052 |
| PWY.7036 | treatment | lyPMA | 11.892469 | 0.0409554 |
| PWY.7053 | treatment | lyPMA | 11.738858 | 0.0411152 |
| SPHINGOLIPID.SYN.PWY | treatment | lyPMA | 11.743618 | 0.0413561 |
| PWY.5081 | treatment | lyPMA | 11.637937 | 0.0423602 |
| ORNDEG.PWY | treatment | lyPMA | 12.714203 | 0.0424394 |
| PWY.5022 | treatment | lyPMA | 16.766734 | 0.0425764 |
| PWY.7546 | treatment | lyPMA | 11.605601 | 0.0427501 |
| PWY.5304 | treatment | lyPMA | 11.802428 | 0.0436190 |
| PWY.7606 | treatment | lyPMA | 11.662213 | 0.0436820 |
| PWY.6829 | treatment | lyPMA | 12.643024 | 0.0440801 |
| PWY.7539 | treatment | lyPMA | 11.744263 | 0.0441427 |
| PWY.7208 | treatment | QIAamp | 12.356065 | 0.0462135 |
| PWY.7007 | treatment | lyPMA | 11.651153 | 0.0464872 |
| PWY.5838 | treatment | lyPMA | 11.831542 | 0.0464896 |
| PWY.5067 | treatment | lyPMA | 11.094221 | 0.0470595 |
| PWY.6125 | treatment | QIAamp | 12.023619 | 0.0470595 |
| PWY.5861 | treatment | lyPMA | 11.810720 | 0.0485953 |
| PWY.7196 | treatment | lyPMA | 13.299493 | 0.0494065 |
| PWY.5381 | treatment | lyPMA | 11.095322 | 0.0501402 |
| PWY.6531 | treatment | lyPMA | 12.370429 | 0.0506428 |
| PWY.7337 | treatment | lyPMA | 11.405467 | 0.0506428 |
| PWY.7338 | treatment | lyPMA | 11.405467 | 0.0506428 |
| GLUCOSE1PMETAB.PWY | treatment | lyPMA | 11.479911 | 0.0511422 |
| PWY.5079 | treatment | lyPMA | 10.751863 | 0.0521130 |
| PWY.4041 | treatment | lyPMA | 11.269646 | 0.0539760 |
| PWY.7197 | treatment | QIAamp | 12.059866 | 0.0567052 |
| PWY.7228 | treatment | QIAamp | 11.868023 | 0.0577075 |
| PWY66.409 | treatment | lyPMA | 10.571484 | 0.0578939 |
| PWY.3502 | treatment | lyPMA | 11.178828 | 0.0583357 |
| COBALSYN.PWY | treatment | lyPMA | 10.565551 | 0.0591426 |
| PWY.6351 | treatment | lyPMA | 11.115878 | 0.0620525 |
| PWY.7282 | treatment | lyPMA | 10.783924 | 0.0631644 |
| DENITRIFICATION.PWY | treatment | lyPMA | 13.775001 | 0.0635578 |
| PWY.5989 | treatment | lyPMA | 12.539258 | 0.0636017 |
| PWY.7118 | treatment | lyPMA | 11.449722 | 0.0643642 |
| PWY.6981 | treatment | lyPMA | 10.887056 | 0.0646027 |
| PWY0.42 | treatment | lyPMA | 12.134184 | 0.0650029 |
| PWY66.388 | treatment | lyPMA | 11.926335 | 0.0654707 |
| PWY.5656 | treatment | lyPMA | 11.262756 | 0.0656139 |
| AST.PWY | treatment | lyPMA | 12.925791 | 0.0658535 |
| PWY.6263 | treatment | lyPMA | 12.288427 | 0.0668910 |
| PWY.7411 | treatment | lyPMA | 11.079530 | 0.0674354 |
| PWY.2201 | treatment | lyPMA | 12.908367 | 0.0732521 |
| PWY.5347 | treatment | lyPMA | 12.379242 | 0.0734364 |
| PWY.5103 | treatment | lyPMA | 12.557227 | 0.0740257 |
| PWY.621 | treatment | lyPMA | 9.678576 | 0.0754965 |
| PWY.7409 | treatment | lyPMA | 10.501277 | 0.0762445 |
| BRANCHED.CHAIN.AA.SYN.PWY | treatment | lyPMA | 12.398479 | 0.0762918 |
| PWY.5177 | treatment | lyPMA | 13.511797 | 0.0771217 |
| UBISYN.PWY | treatment | lyPMA | 12.011745 | 0.0835220 |
| P164.PWY | treatment | lyPMA | 12.212886 | 0.0848672 |
| ILEUSYN.PWY | treatment | lyPMA | 11.886259 | 0.0850172 |
| PWY.5044 | treatment | lyPMA | 11.048660 | 0.0866958 |
| PWY.6596 | treatment | lyPMA | 11.052322 | 0.0866958 |
| PWY.5871 | treatment | lyPMA | 11.062386 | 0.0878288 |
| PWY.5873 | treatment | lyPMA | 11.062386 | 0.0878288 |
| CODH.PWY | treatment | lyPMA | 11.487059 | 0.0888305 |
| PWY.5870 | treatment | lyPMA | 10.936284 | 0.0894628 |
| PWY.5872 | treatment | lyPMA | 11.176384 | 0.0894628 |
| PWY.3801 | treatment | lyPMA | 11.671608 | 0.0895293 |
| PWY3O.19 | treatment | lyPMA | 10.957194 | 0.0895293 |
| PWY.7345 | treatment | lyPMA | 11.694075 | 0.0896610 |
| PWY.7286 | treatment | lyPMA | 11.545038 | 0.0909807 |
| PWY.7117 | treatment | lyPMA | 13.200467 | 0.0914084 |
| PWY.5651 | treatment | lyPMA | 10.848148 | 0.0934163 |
| NADSYN.PWY | treatment | lyPMA | 10.777628 | 0.0938815 |
| LIPASYN.PWY | treatment | lyPMA | 11.291335 | 0.0940850 |
| PWY.7204 | treatment | lyPMA | 12.252985 | 0.0940850 |
| PWY.5941 | treatment | lyPMA | 10.557637 | 0.0977698 |
Final results summary
Sequencing results
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal", "Sputum"),
lyPMA = c("No increase in final reads",
"No increase in final reads",
"No increase in final reads"),
Benzonase = c("No decrease in host %",
"No decrease in host %",
"No decrease in host %"),
`Host zero` = c(NA,
NA,
NA),
Molysis = c("No decrease in host %",
"High cahnge of failure in library pep",
NA),
QIAamp = c("No decrease in host %",
NA,
"No decrease in host %")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of issues of each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | No increase in final reads | No decrease in host % | NA | No decrease in host % | No decrease in host % |
| Nasal | No increase in final reads | No decrease in host % | NA | High cahnge of failure in library pep | NA |
| Sputum | No increase in final reads | No decrease in host % | NA | NA | No decrease in host % |
Diversity changes (taxa)
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal", "Sputum"),
lyPMA = c(NA,
"Beta changed",
"Shannon +"),
Benzonase = c(NA,
NA,
"Richness + InvSimp +"),
`Host zero` = c(NA,
"Richness + InvSimp +",
NA),
Molysis = c(NA,
"Richness + InvSimp +",
"Beta changed"),
QIAamp = c("Beta changed",
NA,
"Beta changed")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of community changes induced by each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | NA | NA | NA | NA | Beta changed |
| Nasal | Beta changed | NA | Richness + InvSimp + | Richness + InvSimp + | NA |
| Sputum | Shannon + | Richness + InvSimp + | NA | Beta changed | Beta changed |
Diversity changes (function)
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal", "Sputum"),
lyPMA = c(NA,
NA,
"Shannon +"),
Benzonase = c(NA,
NA,
"Shannon +"),
`Host zero` = c(NA,
"Richness +",
"Shannon +"),
Molysis = c(NA,
"Richness + InvSimp + BPI +",
"Shannon +"),
QIAamp = c(NA,
"Richness + Shannon +",
"Shannon +")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of functional diversity changes induced by each treatment method") %>%
kable_styling(full_width = 0, html_font = "serif")
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | NA | NA | NA | NA | NA |
| Nasal | NA | NA | Richness + | Richness + InvSimp + BPI + | Richness + Shannon + |
| Sputum | Shannon + | Shannon + | Shannon + | Shannon + | Shannon + |
Potential contaminants
matrix(nrow=3,ncol=5) %>% data.frame() %>% rename(lyPMA = X1, Benzonase = X2, `Host zero` = X3, Molysis = X4, QIAamp = X5) %>%
rownames_to_column("x") %>% mutate(x = c("BAL", "Nasal", "Sputum"),
lyPMA = c("Listeria",
"Listeria",
"Listeria, Candida, Corynebacterium"),
Benzonase = c("Listeria",
"Listeria",
"Listeria, Candida, Corynebacterium"),
`Host zero` = c("Listeria",
"Listeria",
"Listeria, Candida, Corynebacterium"),
Molysis = c("Streptococcaceae, Listeria",
"Streptococcaceae, Listeria",
"Streptococcaceae, Listeria, Candida, Corynebacterium"),
QIAamp = c("Listeria",
"Listeria",
"Listeria, Candida, Corynebacterium")) %>% column_to_rownames("x") %>%
kbl(format = "html", caption = "Table of potential contaminants identified by decontam and DA analysis") %>%
kable_styling(full_width = 0, html_font = "serif") %>%
column_spec(2:6, italic = T) #%>%
| lyPMA | Benzonase | Host zero | Molysis | QIAamp | |
|---|---|---|---|---|---|
| BAL | Listeria | Listeria | Listeria | Streptococcaceae, Listeria | Listeria |
| Nasal | Listeria | Listeria | Listeria | Streptococcaceae, Listeria | Listeria |
| Sputum | Listeria, Candida, Corynebacterium | Listeria, Candida, Corynebacterium | Listeria, Candida, Corynebacterium | Streptococcaceae, Listeria, Candida, Corynebacterium | Listeria, Candida, Corynebacterium |
#row_spec(2:3, bold = T)
Conclusion
1. Effect of treatment was sample type specific.
2. Some methods (lyPMA) made samples failing in library prep.
3. One BAL sample failed in sequencing, but most of treatment enabled its sequencing
4. Alpha diversity and beta diversity were changed by some treatment, specific to some sample type.
5. DA analysis and decontam showed there were some potential contaminants
QIAamp for Nasal, host zero for BAL and sputum successfully 1) incrased final reads, 2) lowered host %, and 3) did not change diversity matrices.
Molysis was effective in increasing efficiencies of sequencing sptum, however diversity matrices were significantly changed.
As our study contains potential contaminants, further analysis is required after adding data of controls.
Done.
Bibliography
#===============================================================================
#BTC.LineZero.Footer.1.1.0
#===============================================================================
#R markdown citation generator.
#===============================================================================
#RLB.Dependencies:
# magrittr, pacman, stringr
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#BTC.Dependencies:
# LineZero.Header
#===============================================================================
#Generates citations for each explicitly loaded library.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
str_libraries <- c("r", str_libraries)
for (str_libraries in str_libraries) {
str_libraries |>
pacman::p_citation() |>
print(bibtex = FALSE) |>
capture.output() %>%
.[-1:-3] %>% .[. != ""] |>
stringr::str_squish() |>
stringr::str_replace("_", "") |>
cat()
cat("\n")
}
## R Core Team (2022). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. We have invested a lot of time and effort in creating R, please cite it when using it for data analysis. See also 'citation("pkgname")' for citing R packages.
## Wickham H, Bryan J (2023). readxl: Read Excel Files_. R package version 1.4.2, <https://CRAN.R-project.org/package=readxl>.
## phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data. Paul J. McMurdie and Susan Holmes (2013) PLoS ONE 8(4):e61217.
## Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." Journal of Open Source Software_, *4*(43), 1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
## Rinker, T. W. & Kurkiewicz, D. (2017). pacman: Package Management for R. version 0.5.0. Buffalo, New York. http://github.com/trinker/pacman
## Garbett SP, Stephens J, Simonov K, Xie Y, Dong Z, Wickham H, Horner J, reikoch, Beasley W, O'Connor B, Warnes GR, Quinn M, Kamvar ZN (2023). yaml: Methods to Convert R Data to YAML and Back_. R package version 2.3.7, <https://CRAN.R-project.org/package=yaml>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.
## H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.
## Oksanen J, Simpson G, Blanchet F, Kindt R, Legendre P, Minchin P, O'Hara R, Solymos P, Stevens M, Szoecs E, Wagner H, Barbour M, Bedward M, Bolker B, Borcard D, Carvalho G, Chirico M, De Caceres M, Durand S, Evangelista H, FitzJohn R, Friendly M, Furneaux B, Hannigan G, Hill M, Lahti L, McGlinn D, Ouellette M, Ribeiro Cunha E, Smith T, Stier A, Ter Braak C, Weedon J (2022). vegan: Community Ecology Package. R package version 2.6-4, <https://CRAN.R-project.org/package=vegan>.
## Leo Lahti et al. microbiome R package. URL: http://microbiome.github.io
## Kassambara A (2023). ggpubr: 'ggplot2' Based Publication Ready Plots. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
## Simon Garnier, Noam Ross, Robert Rudis, Antônio P. Camargo, Marco Sciaini, and Cédric Scherer (2021). Rvision - Colorblind-Friendly Color Maps for R. R package version 0.6.2.
## Davis NM, Proctor D, Holmes SP, Relman DA, Callahan BJ (2017). "Simple statistical identification and removal of contaminant sequences in marker-gene and metagenomics data." bioRxiv_, 221499. doi:10.1101/221499 <https://doi.org/10.1101/221499>.
## Auguie B (2017). gridExtra: Miscellaneous Functions for "Grid" Graphics. R package version 2.3, <https://CRAN.R-project.org/package=gridExtra>.
## Kassambara A (2023). ggpubr: 'ggplot2' Based Publication Ready Plots. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
## Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker (2015). Fitting Linear Mixed-Effects Models Using lme4. Journal of Statistical Software, 67(1), 1-48. doi:10.18637/jss.v067.i01.
## Kuznetsova A, Brockhoff PB, Christensen RHB (2017). "lmerTest Package: Tests in Linear Mixed Effects Models." Journal of Statistical Software, *82*(13), 1-26. doi:10.18637/jss.v082.i13 <https://doi.org/10.18637/jss.v082.i13>.
## Ooms J (2023). writexl: Export Data Frames to Excel 'xlsx' Format_. R package version 1.4.2, <https://CRAN.R-project.org/package=writexl>.
## Gonçalves da Silva A (2017). harrietr: Wrangle Phylogenetic Distance Matrices and Other Utilities. R package version 0.2.3, <https://CRAN.R-project.org/package=harrietr>.
## Mallick H et al. (2020). Multivariable Association in Population-scale Meta-omics Studies, http://huttenhower.sph.harvard.edu/maaslin2. To cite the MaAsLin 2 software, please use: Mallick H, Rahnavard A, McIver LJ (2020). MaAsLin 2: Multivariable Association in Population-scale Meta-omics Studies. R/Bioconductor package, http://huttenhower.sph.harvard.edu/maaslin2.
## Wilke C, Wiernik B (2022). ggtext: Improved Text Rendering Support for 'ggplot2'. R package version 0.1.2, <https://CRAN.R-project.org/package=ggtext>.
## Aphalo P (2022). ggpmisc: Miscellaneous Extensions to 'ggplot2'_. R package version 0.5.2, <https://CRAN.R-project.org/package=ggpmisc>.
## Auguie B (2017). gridExtra: Miscellaneous Functions for "Grid" Graphics. R package version 2.3, <https://CRAN.R-project.org/package=gridExtra>.
## Wood S, Scheipl F (2020). gamm4: Generalized Additive Mixed Models using 'mgcv' and 'lme4'. R package version 0.2-6, <https://CRAN.R-project.org/package=gamm4>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.
## Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.
## Zhu H (2021). kableExtra: Construct Complex Table with 'kable' and Pipe Syntax. R package version 1.3.4, <https://CRAN.R-project.org/package=kableExtra>.
## Yihui Xie (2023). knitr: A General-Purpose Package for Dynamic Report Generation in R. R package version 1.42. Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition. Chapman and Hall/CRC. ISBN 978-1498716963 Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible Research in R. In Victoria Stodden, Friedrich Leisch and Roger D. Peng, editors, Implementing Reproducible Computational Research. Chapman and Hall/CRC. ISBN 978-1466561595
## Guangchuang Yu. (2022). Data Integration, Manipulation and Visualization of Phylogenetic Trees (1st edition). Chapman and Hall/CRC. Shuangbin Xu, Lin Li, Xiao Luo, Meijun Chen, Wenli Tang, Li Zhan, Zehan Dai, Tommy T. Lam, Yi Guan, Guangchuang Yu. Ggtree: A serialized data object for visualization of a phylogenetic tree and annotation data. iMeta 2022, 4(1):e56. doi:10.1002/imt2.56 Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics, 2020, 69:e96. doi: 10.1002/cpbi.96 Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(2):3041-3043. doi: 10.1093/molbev/msy194 Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## John Fox and Sanford Weisberg (2019). An {R} Companion to Applied Regression, Third Edition. Thousand Oaks CA: Sage. URL: https://socialsciences.mcmaster.ca/jfox/Books/Companion/
#===============================================================================